diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -15713,7 +15713,7 @@ SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &IsTailCall = CLI.IsTailCall; - CallingConv::ID CallConv = CLI.CallConv; + CallingConv::ID &CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; EVT PtrVT = getPointerTy(DAG.getDataLayout()); MVT XLenVT = Subtarget.getXLenVT(); @@ -15731,6 +15731,28 @@ CallConv == CallingConv::Fast ? RISCV::CC_RISCV_FastCC : RISCV::CC_RISCV); + // Assign locations to each value returned by this call. + SmallVector RVLocs; + CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); + analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV); + + // Check callee args/returns for RVV registers and set calling convention + // accordingly. + if (CallConv == CallingConv::C || CallConv == CallingConv::Fast) { + auto HasRVVRegLoc = [](CCValAssign &Loc) { + if (!Loc.isRegLoc()) + return false; + + const auto RegClasses = {&RISCV::VRRegClass, &RISCV::VRM2RegClass, + &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}; + return any_of(RegClasses, [&](const auto *RC) + { return RC->contains(Loc.getLocReg()); }); + }; + if (any_of(RVLocs, HasRVVRegLoc) || any_of(ArgLocs, HasRVVRegLoc)) { + CallConv = CallingConv::RISCV_VectorCall; + } + } + // Check if it's really possible to do a tail call. if (IsTailCall) IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs); @@ -15977,11 +15999,6 @@ Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, Glue, DL); Glue = Chain.getValue(1); - // Assign locations to each value returned by this call. - SmallVector RVLocs; - CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext()); - analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true, RISCV::CC_RISCV); - // Copy all of the result registers out of their specified physreg. for (auto &VA : RVLocs) { // Copy the value out diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -67,7 +67,8 @@ } bool HasVectorCSR = - MF->getFunction().getCallingConv() == CallingConv::RISCV_VectorCall; + MF->getFunction().getCallingConv() == CallingConv::RISCV_VectorCall || + MF->getInfo()->isVectorCall(); switch (Subtarget.getTargetABI()) { default: diff --git a/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/abs-vp.ll @@ -610,9 +610,15 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -649,6 +655,12 @@ ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -659,6 +671,52 @@ define @vp_abs_nxv16i64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_abs_nxv16i64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 @@ -674,6 +732,50 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vrsub.vi v24, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 -; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB -; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB +; RUN: llc -mtriple=riscv32 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+v,+m,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB +; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB define @bitreverse_nxv1i8( %va) { ; CHECK-LABEL: bitreverse_nxv1i8: @@ -985,6 +985,52 @@ define @bitreverse_nxv16i32( %va) { ; RV32-LABEL: bitreverse_nxv16i32: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 8 ; RV32-NEXT: lui a0, 16 @@ -1018,10 +1064,100 @@ ; RV32-NEXT: vand.vx v8, v8, a0 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: bitreverse_nxv16i32: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; RV64-NEXT: vsrl.vi v16, v8, 8 ; RV64-NEXT: lui a0, 16 @@ -1055,6 +1191,50 @@ ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: bitreverse_nxv16i32: @@ -1356,23 +1536,44 @@ define @bitreverse_nxv4i64( %va) { ; RV32-LABEL: bitreverse_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw zero, 4(sp) +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: lui a0, 1044480 -; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: sw a0, 16(sp) ; RV32-NEXT: lui a0, 61681 ; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 28(sp) -; RV32-NEXT: sw a0, 24(sp) +; RV32-NEXT: sw a0, 44(sp) +; RV32-NEXT: sw a0, 40(sp) ; RV32-NEXT: lui a0, 209715 ; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 20(sp) -; RV32-NEXT: sw a0, 16(sp) +; RV32-NEXT: sw a0, 36(sp) +; RV32-NEXT: sw a0, 32(sp) ; RV32-NEXT: lui a0, 349525 ; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 28(sp) +; RV32-NEXT: sw a0, 24(sp) ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; RV32-NEXT: vsrl.vx v12, v8, a0 @@ -1380,7 +1581,7 @@ ; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: lui a2, 16 ; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: mv a3, sp +; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vlse64.v v20, (a3), zero ; RV32-NEXT: vand.vx v16, v16, a2 ; RV32-NEXT: vor.vv v12, v16, v12 @@ -1400,21 +1601,21 @@ ; RV32-NEXT: vand.vv v8, v8, v20 ; RV32-NEXT: vsll.vi v8, v8, 8 ; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: addi a0, sp, 24 +; RV32-NEXT: addi a0, sp, 40 ; RV32-NEXT: vlse64.v v20, (a0), zero ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vsrl.vi v12, v8, 4 ; RV32-NEXT: vand.vv v12, v12, v20 ; RV32-NEXT: vand.vv v8, v8, v20 -; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: addi a0, sp, 32 ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 2 ; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: addi a0, sp, 24 ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v12, v8 @@ -1423,7 +1624,27 @@ ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: bitreverse_nxv4i64: @@ -1502,9 +1723,98 @@ ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 22 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 21 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 20 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 19 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 18 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 14 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 13 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 16(sp) @@ -1578,13 +1888,189 @@ ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 22 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 21 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 20 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 19 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 18 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 14 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 13 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: bitreverse_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV64-NEXT: lui a0, 4080 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vand.vx v16, v8, a0 @@ -1641,6 +2127,91 @@ ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: bitreverse_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -2017,6 +2017,52 @@ define @vp_bitreverse_nxv16i32( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv16i32: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t ; RV32-NEXT: lui a0, 16 @@ -2050,10 +2096,100 @@ ; RV32-NEXT: vand.vx v8, v8, a0, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv16i32: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t ; RV64-NEXT: lui a0, 16 @@ -2087,6 +2223,50 @@ ; RV64-NEXT: vand.vx v8, v8, a0, v0.t ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v16, v8, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i32: @@ -2101,6 +2281,52 @@ define @vp_bitreverse_nxv16i32_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv16i32_unmasked: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 8 ; RV32-NEXT: lui a0, 16 @@ -2134,10 +2360,100 @@ ; RV32-NEXT: vand.vx v8, v8, a0 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv16i32_unmasked: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV64-NEXT: vsrl.vi v16, v8, 8 ; RV64-NEXT: lui a0, 16 @@ -2171,6 +2487,50 @@ ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv16i32_unmasked: @@ -2797,23 +3157,44 @@ define @vp_bitreverse_nxv4i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv4i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw zero, 4(sp) +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vx v12, v8, a1, v0.t @@ -2827,7 +3208,7 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v20, v16, a4, v0.t ; RV32-NEXT: vsrl.vi v24, v8, 8, v0.t -; RV32-NEXT: mv a5, sp +; RV32-NEXT: addi a5, sp, 16 ; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2846,7 +3227,7 @@ ; RV32-NEXT: vor.vv v8, v20, v8, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 4, v0.t -; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2855,7 +3236,7 @@ ; RV32-NEXT: vsll.vi v8, v8, 4, v0.t ; RV32-NEXT: vor.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 2, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2864,7 +3245,7 @@ ; RV32-NEXT: vsll.vi v8, v8, 2, v0.t ; RV32-NEXT: vor.vv v8, v12, v8, v0.t ; RV32-NEXT: vsrl.vi v12, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2872,7 +3253,27 @@ ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v12, v8, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv4i64: @@ -2947,23 +3348,44 @@ define @vp_bitreverse_nxv4i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv4i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 -; RV32-NEXT: sw zero, 4(sp) +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 4 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vx v12, v8, a1 @@ -2977,7 +3399,7 @@ ; RV32-NEXT: lui a4, 4080 ; RV32-NEXT: vand.vx v16, v16, a4 ; RV32-NEXT: vsrl.vi v20, v8, 8 -; RV32-NEXT: mv a5, sp +; RV32-NEXT: addi a5, sp, 16 ; RV32-NEXT: vsetvli a6, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v24, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -2996,7 +3418,7 @@ ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -3005,7 +3427,7 @@ ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -3014,7 +3436,7 @@ ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 1 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -3022,7 +3444,27 @@ ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv4i64_unmasked: @@ -3104,10 +3546,57 @@ ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 5 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 30 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 29 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 28 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 27 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 26 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 25 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 16(sp) @@ -3220,8 +3709,55 @@ ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 30 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 29 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 28 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 27 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 26 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 25 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret @@ -3231,9 +3767,56 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 14 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 13 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 12 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 11 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 10 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 4080 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vand.vx v16, v8, a1, v0.t @@ -3298,7 +3881,54 @@ ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -3318,9 +3948,98 @@ ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 22 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 21 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 20 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 19 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 18 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 4 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 4 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 14 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 13 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 12 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 11 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 10 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 16(sp) @@ -3402,26 +4121,202 @@ ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 48 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_nxv7i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, 4080 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vand.vx v16, v8, a1 -; RV64-NEXT: vsll.vi v16, v16, 24 -; RV64-NEXT: li a0, 255 -; RV64-NEXT: slli a0, a0, 24 -; RV64-NEXT: vand.vx v24, v8, a0 -; RV64-NEXT: vsll.vi v24, v24, 8 -; RV64-NEXT: vor.vv v16, v16, v24 -; RV64-NEXT: li a2, 56 -; RV64-NEXT: vsll.vx v24, v8, a2 -; RV64-NEXT: lui a3, 16 -; RV64-NEXT: addiw a3, a3, -256 +; RV32-NEXT: li a1, 22 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 21 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 20 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 19 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 18 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 14 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 13 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_bitreverse_nxv7i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 14 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 13 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 12 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 11 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 10 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill +; RV64-NEXT: lui a1, 4080 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: vsll.vi v16, v16, 24 +; RV64-NEXT: li a0, 255 +; RV64-NEXT: slli a0, a0, 24 +; RV64-NEXT: vand.vx v24, v8, a0 +; RV64-NEXT: vsll.vi v24, v24, 8 +; RV64-NEXT: vor.vv v16, v16, v24 +; RV64-NEXT: li a2, 56 +; RV64-NEXT: vsll.vx v24, v8, a2 +; RV64-NEXT: lui a3, 16 +; RV64-NEXT: addiw a3, a3, -256 ; RV64-NEXT: vand.vx v0, v8, a3 ; RV64-NEXT: li a4, 40 ; RV64-NEXT: vsll.vx v0, v0, a4 @@ -3465,6 +4360,91 @@ ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv7i64_unmasked: @@ -3486,10 +4466,57 @@ ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 5 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 30 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 29 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 28 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 27 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 26 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 25 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 16(sp) @@ -3602,8 +4629,55 @@ ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 30 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 29 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 28 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 27 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 26 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 25 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret @@ -3613,9 +4687,56 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 14 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 13 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 12 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 11 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 10 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 4080 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vand.vx v16, v8, a1, v0.t @@ -3680,7 +4801,54 @@ ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -3700,9 +4868,98 @@ ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 22 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 21 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 20 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 19 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 18 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 4 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 4 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 14 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 13 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 12 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 11 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 10 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 16(sp) @@ -3784,13 +5041,189 @@ ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 22 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 21 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 20 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 19 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 18 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 14 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 13 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv8i64_unmasked: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 14 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 13 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 12 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 11 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 10 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 4080 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vand.vx v16, v8, a1 @@ -3847,6 +5280,91 @@ ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv8i64_unmasked: @@ -3869,9 +5387,15 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: li a2, 18 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v24, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 @@ -3947,6 +5471,12 @@ ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 18 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -3956,9 +5486,15 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: li a2, 18 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 @@ -4034,12 +5570,26 @@ ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 18 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16: ; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: addi sp, sp, -16 +; CHECK-ZVBB-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ZVBB-NEXT: csrr a1, vlenb +; CHECK-ZVBB-NEXT: slli a1, a1, 1 +; CHECK-ZVBB-NEXT: sub sp, sp, a1 +; CHECK-ZVBB-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-ZVBB-NEXT: addi a1, sp, 16 +; CHECK-ZVBB-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 ; CHECK-ZVBB-NEXT: csrr a1, vlenb ; CHECK-ZVBB-NEXT: srli a2, a1, 1 @@ -4059,6 +5609,12 @@ ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 ; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: addi a0, sp, 16 +; CHECK-ZVBB-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-ZVBB-NEXT: csrr a0, vlenb +; CHECK-ZVBB-NEXT: slli a0, a0, 1 +; CHECK-ZVBB-NEXT: add sp, sp, a0 +; CHECK-ZVBB-NEXT: addi sp, sp, 16 ; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bitreverse.nxv64i16( %va, %m, i32 %evl) ret %v @@ -4067,6 +5623,52 @@ define @vp_bitreverse_nxv64i16_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_nxv64i16_unmasked: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 2 ; RV32-NEXT: sub a2, a0, a1 @@ -4121,10 +5723,100 @@ ; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_nxv64i16_unmasked: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 2 ; RV64-NEXT: sub a2, a0, a1 @@ -4179,6 +5871,50 @@ ; RV64-NEXT: vand.vx v8, v8, a4 ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v24, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bitreverse_nxv64i16_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 -; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB -; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+m -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+m,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+m,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB define @bswap_nxv1i16( %va) { ; CHECK-LABEL: bswap_nxv1i16: @@ -293,6 +293,52 @@ define @bswap_nxv16i32( %va) { ; RV32-LABEL: bswap_nxv16i32: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 8 ; RV32-NEXT: lui a0, 16 @@ -305,10 +351,100 @@ ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: bswap_nxv16i32: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; RV64-NEXT: vsrl.vi v16, v8, 8 ; RV64-NEXT: lui a0, 16 @@ -321,6 +457,50 @@ ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: bswap_nxv16i32: @@ -504,6 +684,27 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) @@ -536,6 +737,26 @@ ; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -588,9 +809,98 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: sub sp, sp, a0 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 22 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 21 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 20 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 19 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 18 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 14 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 13 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a0, 1044480 ; RV32-NEXT: sw a0, 8(sp) @@ -631,13 +941,189 @@ ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 22 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 21 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 20 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 19 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 18 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 14 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 13 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: bswap_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV64-NEXT: lui a0, 4080 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vand.vx v16, v8, a0 @@ -667,6 +1153,91 @@ ; RV64-NEXT: vor.vv v8, v8, v0 ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: bswap_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll @@ -605,6 +605,52 @@ define @vp_bswap_nxv16i32( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_bswap_nxv16i32: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 8, v0.t ; RV32-NEXT: lui a0, 16 @@ -617,10 +663,100 @@ ; RV32-NEXT: vsll.vi v8, v8, 24, v0.t ; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bswap_nxv16i32: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV64-NEXT: vsrl.vi v16, v8, 8, v0.t ; RV64-NEXT: lui a0, 16 @@ -633,6 +769,50 @@ ; RV64-NEXT: vsll.vi v8, v8, 24, v0.t ; RV64-NEXT: vor.vv v8, v8, v24, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bswap_nxv16i32: @@ -647,6 +827,52 @@ define @vp_bswap_nxv16i32_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_bswap_nxv16i32_unmasked: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 8 ; RV32-NEXT: lui a0, 16 @@ -659,10 +885,100 @@ ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bswap_nxv16i32_unmasked: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV64-NEXT: vsrl.vi v16, v8, 8 ; RV64-NEXT: lui a0, 16 @@ -675,6 +991,50 @@ ; RV64-NEXT: vsll.vi v8, v8, 24 ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bswap_nxv16i32_unmasked: @@ -1039,6 +1399,27 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) @@ -1073,6 +1454,26 @@ ; RV32-NEXT: vor.vv v8, v24, v8, v0.t ; RV32-NEXT: vor.vv v8, v20, v8, v0.t ; RV32-NEXT: vor.vv v8, v8, v12, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -1123,6 +1524,27 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) @@ -1157,6 +1579,26 @@ ; RV32-NEXT: vor.vv v8, v20, v8 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -1212,10 +1654,57 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 5 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 30 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 29 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 28 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 27 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 26 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 25 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) @@ -1289,8 +1778,55 @@ ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 30 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 29 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 28 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 27 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 26 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 25 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1300,9 +1836,56 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 14 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 13 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 12 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 11 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 10 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 4080 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vand.vx v16, v8, a1, v0.t @@ -1340,7 +1923,54 @@ ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1360,29 +1990,118 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: lui a1, 1044480 -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: li a1, 56 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: li a2, 40 -; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: lui a3, 16 -; RV32-NEXT: addi a3, a3, -256 -; RV32-NEXT: vand.vx v24, v24, a3 -; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: addi a4, sp, 16 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: vand.vx v0, v24, a4 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 22 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 21 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 20 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 19 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 18 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 4 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 4 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 14 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 13 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 12 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 11 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 10 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: lui a1, 1044480 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: li a1, 56 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: li a2, 40 +; RV32-NEXT: vsrl.vx v24, v8, a2 +; RV32-NEXT: lui a3, 16 +; RV32-NEXT: addi a3, a3, -256 +; RV32-NEXT: vand.vx v24, v24, a3 +; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vand.vx v0, v24, a4 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 @@ -1404,13 +2123,189 @@ ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 22 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 21 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 20 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 19 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 18 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 14 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 13 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bswap_nxv7i64_unmasked: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 14 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 13 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 12 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 11 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 10 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 4080 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vand.vx v16, v8, a1 @@ -1440,6 +2335,91 @@ ; RV64-NEXT: vor.vv v8, v8, v0 ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bswap_nxv7i64_unmasked: @@ -1461,10 +2441,57 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 5 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 30 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 29 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 28 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 27 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 26 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 25 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) @@ -1538,8 +2565,55 @@ ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 30 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 29 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 28 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 27 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 26 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 25 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1549,9 +2623,56 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 14 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 13 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 12 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 11 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 10 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 4080 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vand.vx v16, v8, a1, v0.t @@ -1589,7 +2710,54 @@ ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1609,9 +2777,98 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 22 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 21 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 20 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 19 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 18 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 4 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 4 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 14 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 13 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 12 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 11 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 10 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: sw a1, 8(sp) @@ -1653,13 +2910,189 @@ ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 22 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 21 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 20 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 19 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 18 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 14 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 13 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bswap_nxv8i64_unmasked: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 14 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 13 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 12 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 11 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 10 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 4080 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vand.vx v16, v8, a1 @@ -1689,6 +3122,91 @@ ; RV64-NEXT: vor.vv v8, v8, v0 ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bswap_nxv8i64_unmasked: @@ -1711,9 +3229,15 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -1753,12 +3277,26 @@ ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bswap_nxv64i16: ; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: addi sp, sp, -16 +; CHECK-ZVBB-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ZVBB-NEXT: csrr a1, vlenb +; CHECK-ZVBB-NEXT: slli a1, a1, 1 +; CHECK-ZVBB-NEXT: sub sp, sp, a1 +; CHECK-ZVBB-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-ZVBB-NEXT: addi a1, sp, 16 +; CHECK-ZVBB-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 ; CHECK-ZVBB-NEXT: csrr a1, vlenb ; CHECK-ZVBB-NEXT: srli a2, a1, 1 @@ -1778,6 +3316,12 @@ ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 ; CHECK-ZVBB-NEXT: vrev8.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: addi a0, sp, 16 +; CHECK-ZVBB-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-ZVBB-NEXT: csrr a0, vlenb +; CHECK-ZVBB-NEXT: slli a0, a0, 1 +; CHECK-ZVBB-NEXT: add sp, sp, a0 +; CHECK-ZVBB-NEXT: addi sp, sp, 16 ; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.bswap.nxv64i16( %va, %m, i32 %evl) ret %v @@ -1786,6 +3330,52 @@ define @vp_bswap_nxv64i16_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_bswap_nxv64i16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: sub a2, a0, a1 @@ -1804,6 +3394,50 @@ ; CHECK-NEXT: vsrl.vi v24, v8, 8 ; CHECK-NEXT: vsll.vi v8, v8, 8 ; CHECK-NEXT: vor.vv v8, v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_bswap_nxv64i16_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll --- a/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/callee-saved-regs.ll @@ -7,15 +7,185 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: slli a0, a0, 4 ; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 1 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 1 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll @@ -236,9 +236,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 30 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 29 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 28 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 27 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 26 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 25 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 23 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 4 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 @@ -270,7 +359,96 @@ ; CHECK-NEXT: vadd.vx v8, v8, a4 ; CHECK-NEXT: vadd.vx v16, v24, a4 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 30 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 29 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 28 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 27 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 25 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -297,10 +475,100 @@ ; RV32-NEXT: addi s0, sp, 144 ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: andi sp, sp, -128 ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a3, a1, 5 +; RV32-NEXT: sub a1, a3, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 30 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 29 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 28 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 27 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 26 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 25 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 23 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 22 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 21 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 20 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 19 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 18 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a3, a1, 4 +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a3, a0, a1 ; RV32-NEXT: vl8re32.v v24, (a3) @@ -314,6 +582,96 @@ ; RV32-NEXT: vmv8r.v v8, v0 ; RV32-NEXT: vmv8r.v v16, v24 ; RV32-NEXT: call ext2@plt +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 30 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 29 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 28 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 27 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 26 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 25 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 23 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 22 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 21 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 20 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 19 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 18 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload ; RV32-NEXT: addi sp, s0, -144 ; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload @@ -331,10 +689,100 @@ ; RV64-NEXT: addi s0, sp, 144 ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: sub sp, sp, a1 ; RV64-NEXT: andi sp, sp, -128 ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a3, a1, 5 +; RV64-NEXT: sub a1, a3, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 30 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 29 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 28 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 27 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 26 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 25 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 24 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 23 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 22 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 21 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 20 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 19 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 18 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a3, a1, 4 +; RV64-NEXT: add a1, a3, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a3, a0, a1 ; RV64-NEXT: vl8re32.v v24, (a3) @@ -348,6 +796,96 @@ ; RV64-NEXT: vmv8r.v v8, v0 ; RV64-NEXT: vmv8r.v v16, v24 ; RV64-NEXT: call ext2@plt +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 5 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 30 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 29 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 28 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 27 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 26 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 25 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 24 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 23 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 22 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 21 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 20 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 19 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 18 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload ; RV64-NEXT: addi sp, s0, -144 ; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload @@ -369,10 +907,99 @@ ; RV32-NEXT: addi s0, sp, 144 ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 48 +; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: andi sp, sp, -128 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a3, a1, 6 +; RV32-NEXT: sub a1, a3, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 62 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 61 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 60 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 59 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 58 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 57 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 56 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 55 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 54 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 53 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 52 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 51 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 50 ; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: andi sp, sp, -128 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 49 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 128 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a3, a2, a1 @@ -420,6 +1047,96 @@ ; RV32-NEXT: addi a1, a1, 128 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: call ext3@plt +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 6 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 62 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 61 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 60 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 59 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 58 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 57 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 56 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 55 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 54 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 53 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 52 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 51 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 50 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 49 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload ; RV32-NEXT: addi sp, s0, -144 ; RV32-NEXT: lw ra, 140(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 136(sp) # 4-byte Folded Reload @@ -437,11 +1154,100 @@ ; RV64-NEXT: addi s0, sp, 144 ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 48 -; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: slli a1, a1, 6 ; RV64-NEXT: sub sp, sp, a1 ; RV64-NEXT: andi sp, sp, -128 ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a3, a1, 6 +; RV64-NEXT: sub a1, a3, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 62 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 61 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 60 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 59 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 58 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 57 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 56 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 55 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 54 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 53 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 52 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 51 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 50 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 49 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 128 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a3, a2, a1 ; RV64-NEXT: vl8re32.v v24, (a3) @@ -488,6 +1294,96 @@ ; RV64-NEXT: addi a1, a1, 128 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: call ext3@plt +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 6 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 62 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 61 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 60 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 59 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 58 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 57 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 56 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 55 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 54 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 53 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 52 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 51 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 50 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 49 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 128 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload ; RV64-NEXT: addi sp, s0, -144 ; RV64-NEXT: ld ra, 136(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 128(sp) # 8-byte Folded Reload @@ -503,6 +1399,93 @@ define fastcc @vector_arg_indirect_stack(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, %x, %y, %z, i32 %8) { ; CHECK-LABEL: vector_arg_indirect_stack: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, t4, a0 @@ -511,6 +1494,91 @@ ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v24 ; CHECK-NEXT: vadd.vv v16, v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %s = add %x, %z ret %s diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll @@ -6,6 +6,93 @@ define @callee_scalable_vector_split_indirect( %x, %y) { ; CHECK-LABEL: callee_scalable_vector_split_indirect: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 14 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 13 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 12 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 11 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 10 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, a0, a1 @@ -14,6 +101,91 @@ ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v24 ; CHECK-NEXT: vadd.vv v16, v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = add %x, %y ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ceil-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.ceil.nxv1f16(, , i32) @@ -233,6 +233,52 @@ define @vp_ceil_vv_nxv32f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI10_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI10_0)(a1) @@ -248,6 +294,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv32f16( %va, %m, i32 %evl) ret %v @@ -459,6 +549,52 @@ define @vp_ceil_vv_nxv16f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -474,6 +610,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv16f32( %va, %m, i32 %evl) ret %v @@ -641,6 +821,52 @@ define @vp_ceil_vv_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) @@ -656,6 +882,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv7f64( %va, %m, i32 %evl) ret %v @@ -687,6 +957,52 @@ define @vp_ceil_vv_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) @@ -702,6 +1018,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.ceil.nxv8f64( %va, %m, i32 %evl) ret %v @@ -737,9 +1097,68 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 14 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 13 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 12 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 11 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 10 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 @@ -788,7 +1207,66 @@ ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -799,6 +1277,52 @@ define @vp_ceil_vv_nxv16f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) @@ -828,6 +1352,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV32,RV32I -; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV64,RV64I -; RUN: llc -mtriple=riscv32 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV64 -; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV64 -; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB -; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV32,RV32I +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV64,RV64I +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB define @ctlz_nxv1i8( %va) { ; CHECK-ZVE64X-LABEL: ctlz_nxv1i8: @@ -1950,24 +1950,109 @@ define @ctlz_nxv8i64( %va) { ; RV32I-LABEL: ctlz_nxv8i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: .cfi_def_cfa_offset 32 +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: .cfi_def_cfa_offset 48 +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 14 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 13 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 12 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 11 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 10 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 3 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 6 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 2 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: addi a0, sp, 48 +; RV32I-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) +; RV32I-NEXT: sw a0, 44(sp) +; RV32I-NEXT: sw a0, 40(sp) ; RV32I-NEXT: lui a0, 209715 ; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) +; RV32I-NEXT: sw a0, 36(sp) +; RV32I-NEXT: sw a0, 32(sp) ; RV32I-NEXT: lui a0, 61681 ; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, 4112 ; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV32I-NEXT: vsrl.vi v16, v8, 1 ; RV32I-NEXT: vor.vv v8, v8, v16 @@ -1983,9 +2068,9 @@ ; RV32I-NEXT: vsrl.vx v16, v8, a0 ; RV32I-NEXT: vor.vv v8, v8, v16 ; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: addi a0, sp, 24 +; RV32I-NEXT: addi a0, sp, 40 ; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: addi a0, sp, 16 +; RV32I-NEXT: addi a0, sp, 32 ; RV32I-NEXT: vlse64.v v24, (a0), zero ; RV32I-NEXT: vsrl.vi v0, v8, 1 ; RV32I-NEXT: vand.vv v16, v0, v16 @@ -1994,9 +2079,9 @@ ; RV32I-NEXT: vsrl.vi v8, v8, 2 ; RV32I-NEXT: vand.vv v8, v8, v24 ; RV32I-NEXT: vadd.vv v8, v16, v8 -; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: addi a0, sp, 16 ; RV32I-NEXT: vlse64.v v24, (a0), zero ; RV32I-NEXT: vsrl.vi v0, v8, 4 ; RV32I-NEXT: vadd.vv v8, v8, v0 @@ -2004,7 +2089,91 @@ ; RV32I-NEXT: vmul.vv v8, v8, v24 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 14 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 13 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 12 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 11 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 10 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 3 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 6 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 2 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: addi a0, sp, 48 +; RV32I-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: add sp, sp, a0 +; RV32I-NEXT: addi sp, sp, 48 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ctlz_nxv8i64: @@ -3943,24 +4112,109 @@ define @ctlz_zero_undef_nxv8i64( %va) { ; RV32I-LABEL: ctlz_zero_undef_nxv8i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: .cfi_def_cfa_offset 32 +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: .cfi_def_cfa_offset 48 +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 14 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 13 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 12 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 11 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 10 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 3 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 6 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 2 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: addi a0, sp, 48 +; RV32I-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) +; RV32I-NEXT: sw a0, 44(sp) +; RV32I-NEXT: sw a0, 40(sp) ; RV32I-NEXT: lui a0, 209715 ; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) +; RV32I-NEXT: sw a0, 36(sp) +; RV32I-NEXT: sw a0, 32(sp) ; RV32I-NEXT: lui a0, 61681 ; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, 4112 ; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV32I-NEXT: vsrl.vi v16, v8, 1 ; RV32I-NEXT: vor.vv v8, v8, v16 @@ -3976,9 +4230,9 @@ ; RV32I-NEXT: vsrl.vx v16, v8, a0 ; RV32I-NEXT: vor.vv v8, v8, v16 ; RV32I-NEXT: vnot.v v8, v8 -; RV32I-NEXT: addi a0, sp, 24 +; RV32I-NEXT: addi a0, sp, 40 ; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: addi a0, sp, 16 +; RV32I-NEXT: addi a0, sp, 32 ; RV32I-NEXT: vlse64.v v24, (a0), zero ; RV32I-NEXT: vsrl.vi v0, v8, 1 ; RV32I-NEXT: vand.vv v16, v0, v16 @@ -3987,9 +4241,9 @@ ; RV32I-NEXT: vsrl.vi v8, v8, 2 ; RV32I-NEXT: vand.vv v8, v8, v24 ; RV32I-NEXT: vadd.vv v8, v16, v8 -; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: addi a0, sp, 16 ; RV32I-NEXT: vlse64.v v24, (a0), zero ; RV32I-NEXT: vsrl.vi v0, v8, 4 ; RV32I-NEXT: vadd.vv v8, v8, v0 @@ -3997,7 +4251,91 @@ ; RV32I-NEXT: vmul.vv v8, v8, v24 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 14 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 13 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 12 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 11 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 10 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 3 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 6 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 2 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: addi a0, sp, 48 +; RV32I-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: add sp, sp, a0 +; RV32I-NEXT: addi sp, sp, 48 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ctlz_zero_undef_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -1347,6 +1347,14 @@ define @vp_ctlz_nxv16i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_nxv16i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 @@ -1378,10 +1386,24 @@ ; CHECK-NEXT: vrsub.vx v8, v8, a3, v0.t ; CHECK-NEXT: vminu.vx v8, v8, a4, v0.t ; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i64: ; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: addi sp, sp, -16 +; CHECK-ZVBB-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ZVBB-NEXT: csrr a1, vlenb +; CHECK-ZVBB-NEXT: slli a1, a1, 1 +; CHECK-ZVBB-NEXT: sub sp, sp, a1 +; CHECK-ZVBB-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-ZVBB-NEXT: addi a1, sp, 16 +; CHECK-ZVBB-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 ; CHECK-ZVBB-NEXT: csrr a1, vlenb ; CHECK-ZVBB-NEXT: srli a2, a1, 3 @@ -1400,6 +1422,12 @@ ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 ; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: addi a0, sp, 16 +; CHECK-ZVBB-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-ZVBB-NEXT: csrr a0, vlenb +; CHECK-ZVBB-NEXT: slli a0, a0, 1 +; CHECK-ZVBB-NEXT: add sp, sp, a0 +; CHECK-ZVBB-NEXT: addi sp, sp, 16 ; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv16i64( %va, i1 false, %m, i32 %evl) ret %v @@ -2691,6 +2719,14 @@ define @vp_ctlz_zero_undef_nxv16i64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ctlz_zero_undef_nxv16i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 @@ -2719,10 +2755,24 @@ ; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t ; CHECK-NEXT: vrsub.vx v8, v8, a3, v0.t ; CHECK-NEXT: fsrm a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i64: ; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: addi sp, sp, -16 +; CHECK-ZVBB-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ZVBB-NEXT: csrr a1, vlenb +; CHECK-ZVBB-NEXT: slli a1, a1, 1 +; CHECK-ZVBB-NEXT: sub sp, sp, a1 +; CHECK-ZVBB-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-ZVBB-NEXT: addi a1, sp, 16 +; CHECK-ZVBB-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 ; CHECK-ZVBB-NEXT: csrr a1, vlenb ; CHECK-ZVBB-NEXT: srli a2, a1, 3 @@ -2741,6 +2791,12 @@ ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 ; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: addi a0, sp, 16 +; CHECK-ZVBB-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-ZVBB-NEXT: csrr a0, vlenb +; CHECK-ZVBB-NEXT: slli a0, a0, 1 +; CHECK-ZVBB-NEXT: add sp, sp, a0 +; CHECK-ZVBB-NEXT: addi sp, sp, 16 ; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctlz.nxv16i64( %va, i1 true, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 -; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB -; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB define @ctpop_nxv1i8( %va) { ; CHECK-LABEL: ctpop_nxv1i8: @@ -1132,28 +1132,113 @@ define @ctpop_nxv8i64( %va) { ; RV32-LABEL: ctpop_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 14 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 13 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32-NEXT: lui a0, 349525 ; RV32-NEXT: addi a0, a0, 1365 -; RV32-NEXT: sw a0, 28(sp) -; RV32-NEXT: sw a0, 24(sp) +; RV32-NEXT: sw a0, 44(sp) +; RV32-NEXT: sw a0, 40(sp) ; RV32-NEXT: lui a0, 209715 ; RV32-NEXT: addi a0, a0, 819 -; RV32-NEXT: sw a0, 20(sp) -; RV32-NEXT: sw a0, 16(sp) +; RV32-NEXT: sw a0, 36(sp) +; RV32-NEXT: sw a0, 32(sp) ; RV32-NEXT: lui a0, 61681 ; RV32-NEXT: addi a0, a0, -241 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 28(sp) +; RV32-NEXT: sw a0, 24(sp) ; RV32-NEXT: lui a0, 4112 ; RV32-NEXT: addi a0, a0, 257 -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: sw a0, 0(sp) +; RV32-NEXT: sw a0, 20(sp) +; RV32-NEXT: sw a0, 16(sp) ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV32-NEXT: addi a0, sp, 24 +; RV32-NEXT: addi a0, sp, 40 ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: addi a0, sp, 32 ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vsrl.vi v0, v8, 1 ; RV32-NEXT: vand.vv v16, v0, v16 @@ -1162,9 +1247,9 @@ ; RV32-NEXT: vsrl.vi v8, v8, 2 ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vadd.vv v8, v16, v8 -; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: addi a0, sp, 24 ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: mv a0, sp +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vsrl.vi v0, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v0 @@ -1172,7 +1257,91 @@ ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 14 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 13 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: ctpop_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll @@ -2353,33 +2353,77 @@ define @vp_ctpop_nxv7i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv7i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2389,19 +2433,62 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv7i64: @@ -2450,33 +2537,77 @@ define @vp_ctpop_nxv7i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv7i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2486,19 +2617,62 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv7i64_unmasked: @@ -2551,33 +2725,77 @@ define @vp_ctpop_nxv8i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2587,19 +2805,62 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv8i64: @@ -2648,33 +2909,77 @@ define @vp_ctpop_nxv8i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_nxv8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2684,19 +2989,62 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_nxv8i64_unmasked: @@ -2752,10 +3100,16 @@ ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 56 +; RV32-NEXT: li a2, 58 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x3a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 58 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 56 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 48 ; RV32-NEXT: mul a1, a1, a2 @@ -2963,6 +3317,12 @@ ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 56 ; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 58 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret @@ -2972,9 +3332,15 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: li a2, 18 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 @@ -3048,12 +3414,26 @@ ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 18 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i64: ; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: addi sp, sp, -16 +; CHECK-ZVBB-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ZVBB-NEXT: csrr a1, vlenb +; CHECK-ZVBB-NEXT: slli a1, a1, 1 +; CHECK-ZVBB-NEXT: sub sp, sp, a1 +; CHECK-ZVBB-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-ZVBB-NEXT: addi a1, sp, 16 +; CHECK-ZVBB-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 ; CHECK-ZVBB-NEXT: csrr a1, vlenb ; CHECK-ZVBB-NEXT: srli a2, a1, 3 @@ -3072,6 +3452,12 @@ ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 ; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: addi a0, sp, 16 +; CHECK-ZVBB-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-ZVBB-NEXT: csrr a0, vlenb +; CHECK-ZVBB-NEXT: slli a0, a0, 1 +; CHECK-ZVBB-NEXT: add sp, sp, a0 +; CHECK-ZVBB-NEXT: addi sp, sp, 16 ; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.ctpop.nxv16i64( %va, %m, i32 %evl) ret %v @@ -3083,10 +3469,100 @@ ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 +; RV32-NEXT: li a2, 56 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 54 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 53 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 52 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 51 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 50 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 49 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 47 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 46 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 45 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 44 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 43 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 42 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 41 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 @@ -3203,8 +3679,98 @@ ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 54 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 53 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 52 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 51 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 50 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 49 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 47 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 46 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 45 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 44 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 43 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 42 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 41 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 56 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret @@ -3217,6 +3783,52 @@ ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a1 ; RV64-NEXT: .LBB47_2: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 3 +; RV64-NEXT: sub sp, sp, a3 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a4, a3, 3 +; RV64-NEXT: sub a3, a4, a3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 6 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a4, a3, 2 +; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 2 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a4, a3, 1 +; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 1 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV64-NEXT: addi a3, sp, 16 +; RV64-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v8, 1 ; RV64-NEXT: lui a2, 349525 @@ -3264,6 +3876,50 @@ ; RV64-NEXT: vand.vx v16, v16, a4 ; RV64-NEXT: vmul.vx v16, v16, a5 ; RV64-NEXT: vsrl.vx v16, v16, a6 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV32,RV32I -; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV64,RV64I -; RUN: llc -mtriple=riscv32 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV32,RV32F -; RUN: llc -mtriple=riscv64 -mattr=+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV64,RV64F -; RUN: llc -mtriple=riscv32 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV32,RV32D -; RUN: llc -mtriple=riscv64 -mattr=+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV64,RV64D -; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB -; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV32,RV32I +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-ZVE64X,RV64,RV64I +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV32,RV32F +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64f,+f -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-F,RV64,RV64F +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV32,RV32D +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-D,RV64,RV64D +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB define @cttz_nxv1i8( %va) { ; CHECK-ZVE64X-LABEL: cttz_nxv1i8: @@ -1958,32 +1958,117 @@ define @cttz_nxv8i64( %va) { ; RV32I-LABEL: cttz_nxv8i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: .cfi_def_cfa_offset 32 +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: .cfi_def_cfa_offset 48 +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 14 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 13 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 12 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 11 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 10 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 3 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 6 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 2 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: addi a0, sp, 48 +; RV32I-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) +; RV32I-NEXT: sw a0, 44(sp) +; RV32I-NEXT: sw a0, 40(sp) ; RV32I-NEXT: lui a0, 209715 ; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) +; RV32I-NEXT: sw a0, 36(sp) +; RV32I-NEXT: sw a0, 32(sp) ; RV32I-NEXT: lui a0, 61681 ; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, 4112 ; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: li a0, 1 ; RV32I-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32I-NEXT: vsub.vx v16, v8, a0 ; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: vand.vv v8, v8, v16 -; RV32I-NEXT: addi a0, sp, 24 +; RV32I-NEXT: addi a0, sp, 40 ; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: addi a0, sp, 16 +; RV32I-NEXT: addi a0, sp, 32 ; RV32I-NEXT: vlse64.v v24, (a0), zero ; RV32I-NEXT: vsrl.vi v0, v8, 1 ; RV32I-NEXT: vand.vv v16, v0, v16 @@ -1992,9 +2077,9 @@ ; RV32I-NEXT: vsrl.vi v8, v8, 2 ; RV32I-NEXT: vand.vv v8, v8, v24 ; RV32I-NEXT: vadd.vv v8, v16, v8 -; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: addi a0, sp, 16 ; RV32I-NEXT: vlse64.v v24, (a0), zero ; RV32I-NEXT: vsrl.vi v0, v8, 4 ; RV32I-NEXT: vadd.vv v8, v8, v0 @@ -2002,7 +2087,91 @@ ; RV32I-NEXT: vmul.vv v8, v8, v24 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 14 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 13 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 12 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 11 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 10 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 3 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 6 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 2 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: addi a0, sp, 48 +; RV32I-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: add sp, sp, a0 +; RV32I-NEXT: addi sp, sp, 48 ; RV32I-NEXT: ret ; ; RV64I-LABEL: cttz_nxv8i64: @@ -2045,6 +2214,52 @@ ; ; RV32F-LABEL: cttz_nxv8i64: ; RV32F: # %bb.0: +; RV32F-NEXT: addi sp, sp, -16 +; RV32F-NEXT: .cfi_def_cfa_offset 16 +; RV32F-NEXT: csrr a0, vlenb +; RV32F-NEXT: slli a0, a0, 3 +; RV32F-NEXT: sub sp, sp, a0 +; RV32F-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32F-NEXT: csrr a0, vlenb +; RV32F-NEXT: slli a1, a0, 3 +; RV32F-NEXT: sub a0, a1, a0 +; RV32F-NEXT: add a0, sp, a0 +; RV32F-NEXT: addi a0, a0, 16 +; RV32F-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32F-NEXT: csrr a0, vlenb +; RV32F-NEXT: li a1, 6 +; RV32F-NEXT: mul a0, a0, a1 +; RV32F-NEXT: add a0, sp, a0 +; RV32F-NEXT: addi a0, a0, 16 +; RV32F-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32F-NEXT: csrr a0, vlenb +; RV32F-NEXT: slli a1, a0, 2 +; RV32F-NEXT: add a0, a1, a0 +; RV32F-NEXT: add a0, sp, a0 +; RV32F-NEXT: addi a0, a0, 16 +; RV32F-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32F-NEXT: csrr a0, vlenb +; RV32F-NEXT: slli a0, a0, 2 +; RV32F-NEXT: add a0, sp, a0 +; RV32F-NEXT: addi a0, a0, 16 +; RV32F-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32F-NEXT: csrr a0, vlenb +; RV32F-NEXT: slli a1, a0, 1 +; RV32F-NEXT: add a0, a1, a0 +; RV32F-NEXT: add a0, sp, a0 +; RV32F-NEXT: addi a0, a0, 16 +; RV32F-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32F-NEXT: csrr a0, vlenb +; RV32F-NEXT: slli a0, a0, 1 +; RV32F-NEXT: add a0, sp, a0 +; RV32F-NEXT: addi a0, a0, 16 +; RV32F-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32F-NEXT: csrr a0, vlenb +; RV32F-NEXT: add a0, sp, a0 +; RV32F-NEXT: addi a0, a0, 16 +; RV32F-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32F-NEXT: addi a0, sp, 16 +; RV32F-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32F-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV32F-NEXT: vrsub.vi v16, v8, 0 ; RV32F-NEXT: vand.vv v16, v8, v16 @@ -2060,10 +2275,100 @@ ; RV32F-NEXT: li a1, 64 ; RV32F-NEXT: vmerge.vxm v8, v16, a1, v0 ; RV32F-NEXT: fsrm a0 +; RV32F-NEXT: csrr a0, vlenb +; RV32F-NEXT: slli a1, a0, 3 +; RV32F-NEXT: sub a0, a1, a0 +; RV32F-NEXT: add a0, sp, a0 +; RV32F-NEXT: addi a0, a0, 16 +; RV32F-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32F-NEXT: csrr a0, vlenb +; RV32F-NEXT: li a1, 6 +; RV32F-NEXT: mul a0, a0, a1 +; RV32F-NEXT: add a0, sp, a0 +; RV32F-NEXT: addi a0, a0, 16 +; RV32F-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32F-NEXT: csrr a0, vlenb +; RV32F-NEXT: slli a1, a0, 2 +; RV32F-NEXT: add a0, a1, a0 +; RV32F-NEXT: add a0, sp, a0 +; RV32F-NEXT: addi a0, a0, 16 +; RV32F-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32F-NEXT: csrr a0, vlenb +; RV32F-NEXT: slli a0, a0, 2 +; RV32F-NEXT: add a0, sp, a0 +; RV32F-NEXT: addi a0, a0, 16 +; RV32F-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32F-NEXT: csrr a0, vlenb +; RV32F-NEXT: slli a1, a0, 1 +; RV32F-NEXT: add a0, a1, a0 +; RV32F-NEXT: add a0, sp, a0 +; RV32F-NEXT: addi a0, a0, 16 +; RV32F-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32F-NEXT: csrr a0, vlenb +; RV32F-NEXT: slli a0, a0, 1 +; RV32F-NEXT: add a0, sp, a0 +; RV32F-NEXT: addi a0, a0, 16 +; RV32F-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32F-NEXT: csrr a0, vlenb +; RV32F-NEXT: add a0, sp, a0 +; RV32F-NEXT: addi a0, a0, 16 +; RV32F-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32F-NEXT: addi a0, sp, 16 +; RV32F-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32F-NEXT: csrr a0, vlenb +; RV32F-NEXT: slli a0, a0, 3 +; RV32F-NEXT: add sp, sp, a0 +; RV32F-NEXT: addi sp, sp, 16 ; RV32F-NEXT: ret ; ; RV64F-LABEL: cttz_nxv8i64: ; RV64F: # %bb.0: +; RV64F-NEXT: addi sp, sp, -16 +; RV64F-NEXT: .cfi_def_cfa_offset 16 +; RV64F-NEXT: csrr a0, vlenb +; RV64F-NEXT: slli a0, a0, 3 +; RV64F-NEXT: sub sp, sp, a0 +; RV64F-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64F-NEXT: csrr a0, vlenb +; RV64F-NEXT: slli a1, a0, 3 +; RV64F-NEXT: sub a0, a1, a0 +; RV64F-NEXT: add a0, sp, a0 +; RV64F-NEXT: addi a0, a0, 16 +; RV64F-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64F-NEXT: csrr a0, vlenb +; RV64F-NEXT: li a1, 6 +; RV64F-NEXT: mul a0, a0, a1 +; RV64F-NEXT: add a0, sp, a0 +; RV64F-NEXT: addi a0, a0, 16 +; RV64F-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64F-NEXT: csrr a0, vlenb +; RV64F-NEXT: slli a1, a0, 2 +; RV64F-NEXT: add a0, a1, a0 +; RV64F-NEXT: add a0, sp, a0 +; RV64F-NEXT: addi a0, a0, 16 +; RV64F-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64F-NEXT: csrr a0, vlenb +; RV64F-NEXT: slli a0, a0, 2 +; RV64F-NEXT: add a0, sp, a0 +; RV64F-NEXT: addi a0, a0, 16 +; RV64F-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64F-NEXT: csrr a0, vlenb +; RV64F-NEXT: slli a1, a0, 1 +; RV64F-NEXT: add a0, a1, a0 +; RV64F-NEXT: add a0, sp, a0 +; RV64F-NEXT: addi a0, a0, 16 +; RV64F-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64F-NEXT: csrr a0, vlenb +; RV64F-NEXT: slli a0, a0, 1 +; RV64F-NEXT: add a0, sp, a0 +; RV64F-NEXT: addi a0, a0, 16 +; RV64F-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64F-NEXT: csrr a0, vlenb +; RV64F-NEXT: add a0, sp, a0 +; RV64F-NEXT: addi a0, a0, 16 +; RV64F-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64F-NEXT: addi a0, sp, 16 +; RV64F-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV64F-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV64F-NEXT: vrsub.vi v16, v8, 0 ; RV64F-NEXT: vand.vv v16, v8, v16 @@ -2079,6 +2384,50 @@ ; RV64F-NEXT: li a1, 64 ; RV64F-NEXT: vmerge.vxm v8, v16, a1, v0 ; RV64F-NEXT: fsrm a0 +; RV64F-NEXT: csrr a0, vlenb +; RV64F-NEXT: slli a1, a0, 3 +; RV64F-NEXT: sub a0, a1, a0 +; RV64F-NEXT: add a0, sp, a0 +; RV64F-NEXT: addi a0, a0, 16 +; RV64F-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64F-NEXT: csrr a0, vlenb +; RV64F-NEXT: li a1, 6 +; RV64F-NEXT: mul a0, a0, a1 +; RV64F-NEXT: add a0, sp, a0 +; RV64F-NEXT: addi a0, a0, 16 +; RV64F-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64F-NEXT: csrr a0, vlenb +; RV64F-NEXT: slli a1, a0, 2 +; RV64F-NEXT: add a0, a1, a0 +; RV64F-NEXT: add a0, sp, a0 +; RV64F-NEXT: addi a0, a0, 16 +; RV64F-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64F-NEXT: csrr a0, vlenb +; RV64F-NEXT: slli a0, a0, 2 +; RV64F-NEXT: add a0, sp, a0 +; RV64F-NEXT: addi a0, a0, 16 +; RV64F-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64F-NEXT: csrr a0, vlenb +; RV64F-NEXT: slli a1, a0, 1 +; RV64F-NEXT: add a0, a1, a0 +; RV64F-NEXT: add a0, sp, a0 +; RV64F-NEXT: addi a0, a0, 16 +; RV64F-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64F-NEXT: csrr a0, vlenb +; RV64F-NEXT: slli a0, a0, 1 +; RV64F-NEXT: add a0, sp, a0 +; RV64F-NEXT: addi a0, a0, 16 +; RV64F-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64F-NEXT: csrr a0, vlenb +; RV64F-NEXT: add a0, sp, a0 +; RV64F-NEXT: addi a0, a0, 16 +; RV64F-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64F-NEXT: addi a0, sp, 16 +; RV64F-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64F-NEXT: csrr a0, vlenb +; RV64F-NEXT: slli a0, a0, 3 +; RV64F-NEXT: add sp, sp, a0 +; RV64F-NEXT: addi sp, sp, 16 ; RV64F-NEXT: ret ; ; RV32D-LABEL: cttz_nxv8i64: @@ -3848,32 +4197,117 @@ define @cttz_zero_undef_nxv8i64( %va) { ; RV32I-LABEL: cttz_zero_undef_nxv8i64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi sp, sp, -32 -; RV32I-NEXT: .cfi_def_cfa_offset 32 +; RV32I-NEXT: addi sp, sp, -48 +; RV32I-NEXT: .cfi_def_cfa_offset 48 +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: sub sp, sp, a0 +; RV32I-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 14 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 13 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 12 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 11 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 10 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 3 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 6 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 2 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32I-NEXT: addi a0, sp, 48 +; RV32I-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32I-NEXT: lui a0, 349525 ; RV32I-NEXT: addi a0, a0, 1365 -; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: sw a0, 24(sp) +; RV32I-NEXT: sw a0, 44(sp) +; RV32I-NEXT: sw a0, 40(sp) ; RV32I-NEXT: lui a0, 209715 ; RV32I-NEXT: addi a0, a0, 819 -; RV32I-NEXT: sw a0, 20(sp) -; RV32I-NEXT: sw a0, 16(sp) +; RV32I-NEXT: sw a0, 36(sp) +; RV32I-NEXT: sw a0, 32(sp) ; RV32I-NEXT: lui a0, 61681 ; RV32I-NEXT: addi a0, a0, -241 -; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a0, 28(sp) +; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: lui a0, 4112 ; RV32I-NEXT: addi a0, a0, 257 -; RV32I-NEXT: sw a0, 4(sp) -; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: sw a0, 20(sp) +; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: li a0, 1 ; RV32I-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32I-NEXT: vsub.vx v16, v8, a0 ; RV32I-NEXT: vnot.v v8, v8 ; RV32I-NEXT: vand.vv v8, v8, v16 -; RV32I-NEXT: addi a0, sp, 24 +; RV32I-NEXT: addi a0, sp, 40 ; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: addi a0, sp, 16 +; RV32I-NEXT: addi a0, sp, 32 ; RV32I-NEXT: vlse64.v v24, (a0), zero ; RV32I-NEXT: vsrl.vi v0, v8, 1 ; RV32I-NEXT: vand.vv v16, v0, v16 @@ -3882,9 +4316,9 @@ ; RV32I-NEXT: vsrl.vi v8, v8, 2 ; RV32I-NEXT: vand.vv v8, v8, v24 ; RV32I-NEXT: vadd.vv v8, v16, v8 -; RV32I-NEXT: addi a0, sp, 8 +; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: vlse64.v v16, (a0), zero -; RV32I-NEXT: mv a0, sp +; RV32I-NEXT: addi a0, sp, 16 ; RV32I-NEXT: vlse64.v v24, (a0), zero ; RV32I-NEXT: vsrl.vi v0, v8, 4 ; RV32I-NEXT: vadd.vv v8, v8, v0 @@ -3892,7 +4326,91 @@ ; RV32I-NEXT: vmul.vv v8, v8, v24 ; RV32I-NEXT: li a0, 56 ; RV32I-NEXT: vsrl.vx v8, v8, a0 -; RV32I-NEXT: addi sp, sp, 32 +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 14 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 13 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 12 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 11 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 10 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 3 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 3 +; RV32I-NEXT: sub a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: li a1, 6 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 2 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a1, a0, 1 +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 1 +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: add a0, sp, a0 +; RV32I-NEXT: addi a0, a0, 48 +; RV32I-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: addi a0, sp, 48 +; RV32I-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32I-NEXT: csrr a0, vlenb +; RV32I-NEXT: slli a0, a0, 4 +; RV32I-NEXT: add sp, sp, a0 +; RV32I-NEXT: addi sp, sp, 48 ; RV32I-NEXT: ret ; ; RV64I-LABEL: cttz_zero_undef_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll @@ -2633,37 +2633,81 @@ define @vp_cttz_nxv7i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv7i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2673,19 +2717,62 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv7i64: @@ -2738,37 +2825,81 @@ define @vp_cttz_nxv7i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv7i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2778,19 +2909,62 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv7i64_unmasked: @@ -2847,37 +3021,81 @@ define @vp_cttz_nxv8i64( %va, %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv8i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24, v0.t ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2887,19 +3105,62 @@ ; RV32-NEXT: vadd.vv v8, v24, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: mv a1, sp +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv8i64: @@ -2952,37 +3213,81 @@ define @vp_cttz_nxv8i64_unmasked( %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_nxv8i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -32 -; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 48 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 32(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 12(sp) -; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 4(sp) -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1 ; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: addi a1, sp, 32 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2992,19 +3297,62 @@ ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 ; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: mv a1, sp +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv8i64_unmasked: @@ -3064,10 +3412,64 @@ ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 50 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x32, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 50 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 47 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 46 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 45 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 44 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 43 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 42 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 41 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 40 ; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: srli a2, a1, 3 @@ -3240,8 +3642,62 @@ ; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 47 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 46 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 45 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 44 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 43 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 42 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 41 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 50 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret @@ -3251,9 +3707,15 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: li a2, 18 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 @@ -3334,12 +3796,26 @@ ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 18 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv16i64: ; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: addi sp, sp, -16 +; CHECK-ZVBB-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ZVBB-NEXT: csrr a1, vlenb +; CHECK-ZVBB-NEXT: slli a1, a1, 1 +; CHECK-ZVBB-NEXT: sub sp, sp, a1 +; CHECK-ZVBB-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-ZVBB-NEXT: addi a1, sp, 16 +; CHECK-ZVBB-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 ; CHECK-ZVBB-NEXT: csrr a1, vlenb ; CHECK-ZVBB-NEXT: srli a2, a1, 3 @@ -3358,6 +3834,12 @@ ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 ; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: addi a0, sp, 16 +; CHECK-ZVBB-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-ZVBB-NEXT: csrr a0, vlenb +; CHECK-ZVBB-NEXT: slli a0, a0, 1 +; CHECK-ZVBB-NEXT: add sp, sp, a0 +; CHECK-ZVBB-NEXT: addi sp, sp, 16 ; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv16i64( %va, i1 false, %m, i32 %evl) ret %v @@ -3369,9 +3851,99 @@ ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 46 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 45 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 44 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 43 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 42 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 41 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 39 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 38 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 37 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 36 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 35 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 34 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 5 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 ; RV32-NEXT: sw a1, 44(sp) @@ -3484,13 +4056,149 @@ ; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 46 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 45 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 44 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 43 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 42 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 41 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 39 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 38 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 37 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 36 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 35 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 34 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 48 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_nxv16i64_unmasked: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: sub a2, a0, a1 ; RV64-NEXT: sltu a3, a0, a2 @@ -3550,6 +4258,50 @@ ; RV64-NEXT: vand.vx v8, v8, a5 ; RV64-NEXT: vmul.vx v8, v8, a6 ; RV64-NEXT: vsrl.vx v8, v8, a7 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_nxv16i64_unmasked: @@ -4868,9 +5620,15 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -4919,12 +5677,26 @@ ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i64: ; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: addi sp, sp, -16 +; CHECK-ZVBB-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ZVBB-NEXT: csrr a1, vlenb +; CHECK-ZVBB-NEXT: slli a1, a1, 1 +; CHECK-ZVBB-NEXT: sub sp, sp, a1 +; CHECK-ZVBB-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-ZVBB-NEXT: addi a1, sp, 16 +; CHECK-ZVBB-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-ZVBB-NEXT: vmv1r.v v24, v0 ; CHECK-ZVBB-NEXT: csrr a1, vlenb ; CHECK-ZVBB-NEXT: srli a2, a1, 3 @@ -4943,6 +5715,12 @@ ; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-ZVBB-NEXT: vmv1r.v v0, v24 ; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t +; CHECK-ZVBB-NEXT: addi a0, sp, 16 +; CHECK-ZVBB-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-ZVBB-NEXT: csrr a0, vlenb +; CHECK-ZVBB-NEXT: slli a0, a0, 1 +; CHECK-ZVBB-NEXT: add sp, sp, a0 +; CHECK-ZVBB-NEXT: addi sp, sp, 16 ; CHECK-ZVBB-NEXT: ret %v = call @llvm.vp.cttz.nxv16i64( %va, i1 true, %m, i32 %evl) ret %v @@ -4951,6 +5729,52 @@ define @vp_cttz_zero_undef_nxv16i64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_cttz_zero_undef_nxv16i64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: sltu a3, a0, a2 @@ -4978,6 +5802,50 @@ ; CHECK-NEXT: vsrl.vx v8, v8, a2 ; CHECK-NEXT: vsub.vx v8, v8, a3 ; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i64_unmasked: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll @@ -451,6 +451,58 @@ define <32 x i64> @vp_abs_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_abs_v32i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 10 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vslidedown.vi v1, v0, 2 @@ -470,6 +522,56 @@ ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vrsub.vi v24, v16, 0, v0.t ; CHECK-NEXT: vmax.vv v16, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.abs.v32i64(<32 x i64> %va, i1 false, <32 x i1> %m, i32 %evl) ret <32 x i64> %v @@ -484,6 +586,52 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB35_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vrsub.vi v24, v8, 0 ; CHECK-NEXT: vmax.vv v8, v8, v24 @@ -494,6 +642,50 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vrsub.vi v24, v16, 0 ; CHECK-NEXT: vmax.vv v16, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll @@ -1,11 +1,98 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -riscv-v-vector-bits-min=256 < %s | FileCheck %s --check-prefix=VLEN256 -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -riscv-v-vector-bits-min=512 < %s | FileCheck %s --check-prefix=VLEN512 -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs -riscv-v-vector-bits-min=1024 < %s | FileCheck %s --check-prefix=VLEN1024 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs -riscv-v-vector-bits-min=256 < %s | FileCheck %s --check-prefix=VLEN256 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs -riscv-v-vector-bits-min=512 < %s | FileCheck %s --check-prefix=VLEN512 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs -riscv-v-vector-bits-min=1024 < %s | FileCheck %s --check-prefix=VLEN1024 define <512 x i8> @bitcast_1024B(<256 x i16> %a, <512 x i8> %b) { ; VLEN256-LABEL: bitcast_1024B: ; VLEN256: # %bb.0: +; VLEN256-NEXT: addi sp, sp, -16 +; VLEN256-NEXT: .cfi_def_cfa_offset 16 +; VLEN256-NEXT: csrr a1, vlenb +; VLEN256-NEXT: slli a1, a1, 4 +; VLEN256-NEXT: sub sp, sp, a1 +; VLEN256-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; VLEN256-NEXT: csrr a1, vlenb +; VLEN256-NEXT: li a2, 14 +; VLEN256-NEXT: mul a1, a1, a2 +; VLEN256-NEXT: add a1, sp, a1 +; VLEN256-NEXT: addi a1, a1, 16 +; VLEN256-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; VLEN256-NEXT: csrr a1, vlenb +; VLEN256-NEXT: li a2, 13 +; VLEN256-NEXT: mul a1, a1, a2 +; VLEN256-NEXT: add a1, sp, a1 +; VLEN256-NEXT: addi a1, a1, 16 +; VLEN256-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; VLEN256-NEXT: csrr a1, vlenb +; VLEN256-NEXT: li a2, 12 +; VLEN256-NEXT: mul a1, a1, a2 +; VLEN256-NEXT: add a1, sp, a1 +; VLEN256-NEXT: addi a1, a1, 16 +; VLEN256-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; VLEN256-NEXT: csrr a1, vlenb +; VLEN256-NEXT: li a2, 11 +; VLEN256-NEXT: mul a1, a1, a2 +; VLEN256-NEXT: add a1, sp, a1 +; VLEN256-NEXT: addi a1, a1, 16 +; VLEN256-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; VLEN256-NEXT: csrr a1, vlenb +; VLEN256-NEXT: li a2, 10 +; VLEN256-NEXT: mul a1, a1, a2 +; VLEN256-NEXT: add a1, sp, a1 +; VLEN256-NEXT: addi a1, a1, 16 +; VLEN256-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; VLEN256-NEXT: csrr a1, vlenb +; VLEN256-NEXT: slli a2, a1, 3 +; VLEN256-NEXT: add a1, a2, a1 +; VLEN256-NEXT: add a1, sp, a1 +; VLEN256-NEXT: addi a1, a1, 16 +; VLEN256-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; VLEN256-NEXT: csrr a1, vlenb +; VLEN256-NEXT: slli a1, a1, 3 +; VLEN256-NEXT: add a1, sp, a1 +; VLEN256-NEXT: addi a1, a1, 16 +; VLEN256-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; VLEN256-NEXT: csrr a1, vlenb +; VLEN256-NEXT: slli a2, a1, 3 +; VLEN256-NEXT: sub a1, a2, a1 +; VLEN256-NEXT: add a1, sp, a1 +; VLEN256-NEXT: addi a1, a1, 16 +; VLEN256-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; VLEN256-NEXT: csrr a1, vlenb +; VLEN256-NEXT: li a2, 6 +; VLEN256-NEXT: mul a1, a1, a2 +; VLEN256-NEXT: add a1, sp, a1 +; VLEN256-NEXT: addi a1, a1, 16 +; VLEN256-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; VLEN256-NEXT: csrr a1, vlenb +; VLEN256-NEXT: slli a2, a1, 2 +; VLEN256-NEXT: add a1, a2, a1 +; VLEN256-NEXT: add a1, sp, a1 +; VLEN256-NEXT: addi a1, a1, 16 +; VLEN256-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; VLEN256-NEXT: csrr a1, vlenb +; VLEN256-NEXT: slli a1, a1, 2 +; VLEN256-NEXT: add a1, sp, a1 +; VLEN256-NEXT: addi a1, a1, 16 +; VLEN256-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; VLEN256-NEXT: csrr a1, vlenb +; VLEN256-NEXT: slli a2, a1, 1 +; VLEN256-NEXT: add a1, a2, a1 +; VLEN256-NEXT: add a1, sp, a1 +; VLEN256-NEXT: addi a1, a1, 16 +; VLEN256-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; VLEN256-NEXT: csrr a1, vlenb +; VLEN256-NEXT: slli a1, a1, 1 +; VLEN256-NEXT: add a1, sp, a1 +; VLEN256-NEXT: addi a1, a1, 16 +; VLEN256-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; VLEN256-NEXT: csrr a1, vlenb +; VLEN256-NEXT: add a1, sp, a1 +; VLEN256-NEXT: addi a1, a1, 16 +; VLEN256-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; VLEN256-NEXT: addi a1, sp, 16 +; VLEN256-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; VLEN256-NEXT: addi a1, a0, 256 ; VLEN256-NEXT: li a2, 256 ; VLEN256-NEXT: vsetvli zero, a2, e8, m8, ta, ma @@ -13,6 +100,91 @@ ; VLEN256-NEXT: vle8.v v0, (a1) ; VLEN256-NEXT: vadd.vv v8, v24, v8 ; VLEN256-NEXT: vadd.vv v16, v0, v16 +; VLEN256-NEXT: csrr a0, vlenb +; VLEN256-NEXT: li a1, 14 +; VLEN256-NEXT: mul a0, a0, a1 +; VLEN256-NEXT: add a0, sp, a0 +; VLEN256-NEXT: addi a0, a0, 16 +; VLEN256-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; VLEN256-NEXT: csrr a0, vlenb +; VLEN256-NEXT: li a1, 13 +; VLEN256-NEXT: mul a0, a0, a1 +; VLEN256-NEXT: add a0, sp, a0 +; VLEN256-NEXT: addi a0, a0, 16 +; VLEN256-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; VLEN256-NEXT: csrr a0, vlenb +; VLEN256-NEXT: li a1, 12 +; VLEN256-NEXT: mul a0, a0, a1 +; VLEN256-NEXT: add a0, sp, a0 +; VLEN256-NEXT: addi a0, a0, 16 +; VLEN256-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; VLEN256-NEXT: csrr a0, vlenb +; VLEN256-NEXT: li a1, 11 +; VLEN256-NEXT: mul a0, a0, a1 +; VLEN256-NEXT: add a0, sp, a0 +; VLEN256-NEXT: addi a0, a0, 16 +; VLEN256-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; VLEN256-NEXT: csrr a0, vlenb +; VLEN256-NEXT: li a1, 10 +; VLEN256-NEXT: mul a0, a0, a1 +; VLEN256-NEXT: add a0, sp, a0 +; VLEN256-NEXT: addi a0, a0, 16 +; VLEN256-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; VLEN256-NEXT: csrr a0, vlenb +; VLEN256-NEXT: slli a1, a0, 3 +; VLEN256-NEXT: add a0, a1, a0 +; VLEN256-NEXT: add a0, sp, a0 +; VLEN256-NEXT: addi a0, a0, 16 +; VLEN256-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; VLEN256-NEXT: csrr a0, vlenb +; VLEN256-NEXT: slli a0, a0, 3 +; VLEN256-NEXT: add a0, sp, a0 +; VLEN256-NEXT: addi a0, a0, 16 +; VLEN256-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; VLEN256-NEXT: csrr a0, vlenb +; VLEN256-NEXT: slli a1, a0, 3 +; VLEN256-NEXT: sub a0, a1, a0 +; VLEN256-NEXT: add a0, sp, a0 +; VLEN256-NEXT: addi a0, a0, 16 +; VLEN256-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; VLEN256-NEXT: csrr a0, vlenb +; VLEN256-NEXT: li a1, 6 +; VLEN256-NEXT: mul a0, a0, a1 +; VLEN256-NEXT: add a0, sp, a0 +; VLEN256-NEXT: addi a0, a0, 16 +; VLEN256-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; VLEN256-NEXT: csrr a0, vlenb +; VLEN256-NEXT: slli a1, a0, 2 +; VLEN256-NEXT: add a0, a1, a0 +; VLEN256-NEXT: add a0, sp, a0 +; VLEN256-NEXT: addi a0, a0, 16 +; VLEN256-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; VLEN256-NEXT: csrr a0, vlenb +; VLEN256-NEXT: slli a0, a0, 2 +; VLEN256-NEXT: add a0, sp, a0 +; VLEN256-NEXT: addi a0, a0, 16 +; VLEN256-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; VLEN256-NEXT: csrr a0, vlenb +; VLEN256-NEXT: slli a1, a0, 1 +; VLEN256-NEXT: add a0, a1, a0 +; VLEN256-NEXT: add a0, sp, a0 +; VLEN256-NEXT: addi a0, a0, 16 +; VLEN256-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; VLEN256-NEXT: csrr a0, vlenb +; VLEN256-NEXT: slli a0, a0, 1 +; VLEN256-NEXT: add a0, sp, a0 +; VLEN256-NEXT: addi a0, a0, 16 +; VLEN256-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; VLEN256-NEXT: csrr a0, vlenb +; VLEN256-NEXT: add a0, sp, a0 +; VLEN256-NEXT: addi a0, a0, 16 +; VLEN256-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; VLEN256-NEXT: addi a0, sp, 16 +; VLEN256-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; VLEN256-NEXT: csrr a0, vlenb +; VLEN256-NEXT: slli a0, a0, 4 +; VLEN256-NEXT: add sp, sp, a0 +; VLEN256-NEXT: addi sp, sp, 16 ; VLEN256-NEXT: ret ; ; VLEN512-LABEL: bitcast_1024B: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -1925,6 +1925,52 @@ define <8 x i64> @vp_bitreverse_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_v8i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v12, v0 ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -1993,6 +2039,50 @@ ; RV32-NEXT: vand.vv v8, v8, v20, v0.t ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_v8i64: @@ -2061,6 +2151,29 @@ define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_bitreverse_v8i64_unmasked: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vx v12, v8, a1 @@ -2127,6 +2240,27 @@ ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v12, v8 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_v8i64_unmasked: @@ -2202,9 +2336,63 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 42 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 39 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 38 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 37 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 36 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 35 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 34 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 5 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a3, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2336,7 +2524,61 @@ ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 39 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 38 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 37 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 36 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 35 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 34 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 42 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2346,9 +2588,56 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 14 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 13 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 12 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 11 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 10 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 4080 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vand.vx v16, v8, a1, v0.t @@ -2413,7 +2702,54 @@ ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -2427,9 +2763,98 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 22 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 21 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 20 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 19 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 18 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 4 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 4 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 14 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 13 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 12 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 11 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 10 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vx v16, v8, a1 @@ -2504,21 +2929,197 @@ ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_bitreverse_v15i64_unmasked: -; RV64: # %bb.0: -; RV64-NEXT: lui a1, 4080 -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV64-NEXT: vand.vx v16, v8, a1 -; RV64-NEXT: vsll.vi v16, v16, 24 -; RV64-NEXT: li a0, 255 -; RV64-NEXT: slli a0, a0, 24 -; RV64-NEXT: vand.vx v24, v8, a0 -; RV64-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: li a1, 22 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 21 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 20 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 19 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 18 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 14 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 13 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_bitreverse_v15i64_unmasked: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 14 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 13 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 12 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 11 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 10 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill +; RV64-NEXT: lui a1, 4080 +; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vand.vx v16, v8, a1 +; RV64-NEXT: vsll.vi v16, v16, 24 +; RV64-NEXT: li a0, 255 +; RV64-NEXT: slli a0, a0, 24 +; RV64-NEXT: vand.vx v24, v8, a0 +; RV64-NEXT: vsll.vi v24, v24, 8 ; RV64-NEXT: vor.vv v16, v16, v24 ; RV64-NEXT: li a2, 56 ; RV64-NEXT: vsll.vx v24, v8, a2 @@ -2567,6 +3168,91 @@ ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer @@ -2582,9 +3268,63 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 42 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 39 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 38 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 37 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 36 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 35 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 34 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 5 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a3, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2716,7 +3456,61 @@ ; RV32-NEXT: vsll.vi v8, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 39 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 38 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 37 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 36 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 35 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 34 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 42 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2726,9 +3520,56 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 14 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 13 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 12 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 11 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 10 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 4080 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vand.vx v16, v8, a1, v0.t @@ -2793,7 +3634,54 @@ ; RV64-NEXT: vsll.vi v8, v8, 1, v0.t ; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -2807,9 +3695,98 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 22 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 21 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 20 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 19 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 18 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 4 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 4 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 14 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 13 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 12 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 11 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 10 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vx v16, v8, a1 @@ -2884,13 +3861,189 @@ ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 22 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 21 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 20 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 19 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 18 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 14 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 13 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_v16i64_unmasked: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 14 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 13 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 12 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 11 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 10 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 4080 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vand.vx v16, v8, a1 @@ -2947,6 +4100,91 @@ ; RV64-NEXT: vand.vx v8, v8, a0 ; RV64-NEXT: vadd.vv v8, v8, v8 ; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -2962,9 +4200,15 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: li a2, 18 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 @@ -3038,6 +4282,12 @@ ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 18 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -3047,9 +4297,15 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: li a2, 18 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 @@ -3123,6 +4379,12 @@ ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 18 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -3139,6 +4401,52 @@ ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 64 ; RV32-NEXT: .LBB35_2: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: sub a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 6 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 2 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 1 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; RV32-NEXT: vsrl.vi v24, v8, 8 ; RV32-NEXT: vsll.vi v8, v8, 8 @@ -3187,6 +4495,50 @@ ; RV32-NEXT: vand.vx v16, v16, a3 ; RV32-NEXT: vadd.vv v16, v16, v16 ; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bitreverse_v128i16_unmasked: @@ -3197,6 +4549,52 @@ ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 64 ; RV64-NEXT: .LBB35_2: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v8, 8 ; RV64-NEXT: vsll.vi v8, v8, 8 @@ -3245,6 +4643,50 @@ ; RV64-NEXT: vand.vx v16, v16, a3 ; RV64-NEXT: vadd.vv v16, v16, v16 ; RV64-NEXT: vor.vv v16, v24, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %head = insertelement <128 x i1> poison, i1 true, i32 0 %m = shufflevector <128 x i1> %head, <128 x i1> poison, <128 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -737,6 +737,52 @@ define <8 x i64> @vp_bswap_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_bswap_v8i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v12, v0 ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma @@ -775,6 +821,50 @@ ; RV32-NEXT: vor.vv v8, v28, v8, v0.t ; RV32-NEXT: vor.vv v8, v24, v8, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bswap_v8i64: @@ -816,6 +906,29 @@ define <8 x i64> @vp_bswap_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_bswap_v8i64_unmasked: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vx v12, v8, a1 @@ -852,6 +965,27 @@ ; RV32-NEXT: vor.vv v8, v20, v8 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bswap_v8i64_unmasked: @@ -900,9 +1034,63 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 42 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 39 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 38 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 37 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 36 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 35 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 34 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 5 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -1006,7 +1194,61 @@ ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 39 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 38 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 37 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 36 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 35 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 34 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 42 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1016,9 +1258,56 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 14 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 13 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 12 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 11 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 10 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 4080 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vand.vx v16, v8, a1, v0.t @@ -1056,7 +1345,54 @@ ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1070,9 +1406,98 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 22 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 21 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 20 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 19 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 18 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 4 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 4 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 14 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 13 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 12 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 11 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 10 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vx v16, v8, a1 @@ -1119,13 +1544,189 @@ ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 22 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 21 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 20 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 19 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 18 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 14 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 13 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bswap_v15i64_unmasked: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 14 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 13 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 12 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 11 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 10 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 4080 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vand.vx v16, v8, a1 @@ -1155,6 +1756,91 @@ ; RV64-NEXT: vor.vv v8, v8, v0 ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer @@ -1170,9 +1856,63 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: li a2, 42 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x2a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 42 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 39 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 38 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 37 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 36 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 35 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 34 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 5 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -1276,7 +2016,61 @@ ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 39 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 38 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 37 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 36 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 35 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 34 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 42 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -1286,9 +2080,56 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 4 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 14 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 13 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 12 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 11 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 10 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 4080 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vand.vx v16, v8, a1, v0.t @@ -1326,7 +2167,54 @@ ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1340,9 +2228,98 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 22 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 21 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 20 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 19 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 18 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 4 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 4 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 14 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 13 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 12 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 11 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 10 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vx v16, v8, a1 @@ -1389,13 +2366,189 @@ ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 22 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 21 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 20 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 19 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 18 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 14 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 13 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_bswap_v16i64_unmasked: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 14 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 13 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 12 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 11 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 10 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 4080 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vand.vx v16, v8, a1 @@ -1425,6 +2578,91 @@ ; RV64-NEXT: vor.vv v8, v8, v0 ; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vor.vv v8, v16, v8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -1440,9 +2678,15 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 @@ -1480,6 +2724,12 @@ ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1496,6 +2746,52 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vsrl.vi v24, v8, 8 ; CHECK-NEXT: vsll.vi v8, v8, 8 @@ -1508,6 +2804,50 @@ ; CHECK-NEXT: vsrl.vi v24, v16, 8 ; CHECK-NEXT: vsll.vi v16, v16, 8 ; CHECK-NEXT: vor.vv v16, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <128 x i1> poison, i1 true, i32 0 %m = shufflevector <128 x i1> %head, <128 x i1> poison, <128 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 define fastcc <4 x i8> @ret_v4i8(ptr %p) { ; CHECK-LABEL: ret_v4i8: @@ -211,16 +211,152 @@ define fastcc <32 x i32> @ret_v32i32_param_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %w) { ; LMULMAX8-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32: ; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: addi sp, sp, -16 +; LMULMAX8-NEXT: .cfi_def_cfa_offset 16 +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a2, a2, 3 +; LMULMAX8-NEXT: sub sp, sp, a2 +; LMULMAX8-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a3, a2, 3 +; LMULMAX8-NEXT: sub a2, a3, a2 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 16 +; LMULMAX8-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: li a3, 6 +; LMULMAX8-NEXT: mul a2, a2, a3 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 16 +; LMULMAX8-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a3, a2, 2 +; LMULMAX8-NEXT: add a2, a3, a2 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 16 +; LMULMAX8-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a2, a2, 2 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 16 +; LMULMAX8-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a3, a2, 1 +; LMULMAX8-NEXT: add a2, a3, a2 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 16 +; LMULMAX8-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a2, a2, 1 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 16 +; LMULMAX8-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 16 +; LMULMAX8-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: addi a2, sp, 16 +; LMULMAX8-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; LMULMAX8-NEXT: li a2, 32 ; LMULMAX8-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; LMULMAX8-NEXT: vle32.v v24, (a0) ; LMULMAX8-NEXT: vadd.vv v8, v8, v16 ; LMULMAX8-NEXT: vadd.vv v8, v8, v24 ; LMULMAX8-NEXT: vadd.vx v8, v8, a1 +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 3 +; LMULMAX8-NEXT: sub a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: li a1, 6 +; LMULMAX8-NEXT: mul a0, a0, a1 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 2 +; LMULMAX8-NEXT: add a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 2 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 1 +; LMULMAX8-NEXT: add a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 1 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: addi a0, sp, 16 +; LMULMAX8-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 3 +; LMULMAX8-NEXT: add sp, sp, a0 +; LMULMAX8-NEXT: addi sp, sp, 16 ; LMULMAX8-NEXT: ret ; ; LMULMAX4-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32: ; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: addi sp, sp, -16 +; LMULMAX4-NEXT: .cfi_def_cfa_offset 16 +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a1, a1, 3 +; LMULMAX4-NEXT: sub sp, sp, a1 +; LMULMAX4-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a3, a1, 3 +; LMULMAX4-NEXT: sub a1, a3, a1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 16 +; LMULMAX4-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: li a3, 6 +; LMULMAX4-NEXT: mul a1, a1, a3 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 16 +; LMULMAX4-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a3, a1, 2 +; LMULMAX4-NEXT: add a1, a3, a1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 16 +; LMULMAX4-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a1, a1, 2 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 16 +; LMULMAX4-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a3, a1, 1 +; LMULMAX4-NEXT: add a1, a3, a1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 16 +; LMULMAX4-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a1, a1, 1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 16 +; LMULMAX4-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 16 +; LMULMAX4-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: addi a1, sp, 16 +; LMULMAX4-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; LMULMAX4-NEXT: addi a1, a0, 64 ; LMULMAX4-NEXT: vle32.v v24, (a1) @@ -231,6 +367,50 @@ ; LMULMAX4-NEXT: vadd.vv v8, v8, v28 ; LMULMAX4-NEXT: vadd.vx v8, v8, a2 ; LMULMAX4-NEXT: vadd.vx v12, v12, a2 +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 3 +; LMULMAX4-NEXT: sub a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: li a1, 6 +; LMULMAX4-NEXT: mul a0, a0, a1 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 2 +; LMULMAX4-NEXT: add a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 2 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 1 +; LMULMAX4-NEXT: add a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 1 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: addi a0, sp, 16 +; LMULMAX4-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 3 +; LMULMAX4-NEXT: add sp, sp, a0 +; LMULMAX4-NEXT: addi sp, sp, 16 ; LMULMAX4-NEXT: ret %r = add <32 x i32> %x, %y %s = add <32 x i32> %r, %z @@ -246,25 +426,156 @@ define fastcc <32 x i32> @ret_v32i32_call_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> %y, i32 %w) { ; LMULMAX8-LABEL: ret_v32i32_call_v32i32_v32i32_i32: ; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: addi sp, sp, -16 -; LMULMAX8-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX8-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LMULMAX8-NEXT: addi sp, sp, -48 +; LMULMAX8-NEXT: .cfi_def_cfa_offset 48 +; LMULMAX8-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; LMULMAX8-NEXT: .cfi_offset ra, -8 +; LMULMAX8-NEXT: csrr a1, vlenb +; LMULMAX8-NEXT: slli a1, a1, 3 +; LMULMAX8-NEXT: sub sp, sp, a1 +; LMULMAX8-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; LMULMAX8-NEXT: csrr a1, vlenb +; LMULMAX8-NEXT: slli a2, a1, 3 +; LMULMAX8-NEXT: sub a1, a2, a1 +; LMULMAX8-NEXT: add a1, sp, a1 +; LMULMAX8-NEXT: addi a1, a1, 32 +; LMULMAX8-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a1, vlenb +; LMULMAX8-NEXT: li a2, 6 +; LMULMAX8-NEXT: mul a1, a1, a2 +; LMULMAX8-NEXT: add a1, sp, a1 +; LMULMAX8-NEXT: addi a1, a1, 32 +; LMULMAX8-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a1, vlenb +; LMULMAX8-NEXT: slli a2, a1, 2 +; LMULMAX8-NEXT: add a1, a2, a1 +; LMULMAX8-NEXT: add a1, sp, a1 +; LMULMAX8-NEXT: addi a1, a1, 32 +; LMULMAX8-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a1, vlenb +; LMULMAX8-NEXT: slli a1, a1, 2 +; LMULMAX8-NEXT: add a1, sp, a1 +; LMULMAX8-NEXT: addi a1, a1, 32 +; LMULMAX8-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a1, vlenb +; LMULMAX8-NEXT: slli a2, a1, 1 +; LMULMAX8-NEXT: add a1, a2, a1 +; LMULMAX8-NEXT: add a1, sp, a1 +; LMULMAX8-NEXT: addi a1, a1, 32 +; LMULMAX8-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a1, vlenb +; LMULMAX8-NEXT: slli a1, a1, 1 +; LMULMAX8-NEXT: add a1, sp, a1 +; LMULMAX8-NEXT: addi a1, a1, 32 +; LMULMAX8-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a1, vlenb +; LMULMAX8-NEXT: add a1, sp, a1 +; LMULMAX8-NEXT: addi a1, a1, 32 +; LMULMAX8-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; LMULMAX8-NEXT: addi a1, sp, 32 +; LMULMAX8-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; LMULMAX8-NEXT: vmv8r.v v24, v8 ; LMULMAX8-NEXT: li a1, 2 ; LMULMAX8-NEXT: vmv8r.v v8, v16 ; LMULMAX8-NEXT: vmv8r.v v16, v24 ; LMULMAX8-NEXT: call ext2@plt -; LMULMAX8-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; LMULMAX8-NEXT: addi sp, sp, 16 +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 3 +; LMULMAX8-NEXT: sub a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 32 +; LMULMAX8-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: li a1, 6 +; LMULMAX8-NEXT: mul a0, a0, a1 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 32 +; LMULMAX8-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 2 +; LMULMAX8-NEXT: add a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 32 +; LMULMAX8-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 2 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 32 +; LMULMAX8-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 1 +; LMULMAX8-NEXT: add a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 32 +; LMULMAX8-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 1 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 32 +; LMULMAX8-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 32 +; LMULMAX8-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: addi a0, sp, 32 +; LMULMAX8-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 3 +; LMULMAX8-NEXT: add sp, sp, a0 +; LMULMAX8-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; LMULMAX8-NEXT: addi sp, sp, 48 ; LMULMAX8-NEXT: ret ; ; LMULMAX4-LABEL: ret_v32i32_call_v32i32_v32i32_i32: ; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi sp, sp, -16 -; LMULMAX4-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX4-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LMULMAX4-NEXT: addi sp, sp, -48 +; LMULMAX4-NEXT: .cfi_def_cfa_offset 48 +; LMULMAX4-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; LMULMAX4-NEXT: .cfi_offset ra, -8 +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a1, a1, 3 +; LMULMAX4-NEXT: sub sp, sp, a1 +; LMULMAX4-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a2, a1, 3 +; LMULMAX4-NEXT: sub a1, a2, a1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 32 +; LMULMAX4-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: li a2, 6 +; LMULMAX4-NEXT: mul a1, a1, a2 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 32 +; LMULMAX4-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a2, a1, 2 +; LMULMAX4-NEXT: add a1, a2, a1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 32 +; LMULMAX4-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a1, a1, 2 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 32 +; LMULMAX4-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a2, a1, 1 +; LMULMAX4-NEXT: add a1, a2, a1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 32 +; LMULMAX4-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a1, a1, 1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 32 +; LMULMAX4-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 32 +; LMULMAX4-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: addi a1, sp, 32 +; LMULMAX4-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; LMULMAX4-NEXT: vmv4r.v v24, v12 ; LMULMAX4-NEXT: vmv4r.v v28, v8 ; LMULMAX4-NEXT: li a1, 2 @@ -273,8 +584,51 @@ ; LMULMAX4-NEXT: vmv4r.v v16, v28 ; LMULMAX4-NEXT: vmv4r.v v20, v24 ; LMULMAX4-NEXT: call ext2@plt -; LMULMAX4-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: addi sp, sp, 16 +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 3 +; LMULMAX4-NEXT: sub a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 32 +; LMULMAX4-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: li a1, 6 +; LMULMAX4-NEXT: mul a0, a0, a1 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 32 +; LMULMAX4-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 2 +; LMULMAX4-NEXT: add a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 32 +; LMULMAX4-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 2 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 32 +; LMULMAX4-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 1 +; LMULMAX4-NEXT: add a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 32 +; LMULMAX4-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 1 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 32 +; LMULMAX4-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 32 +; LMULMAX4-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: addi a0, sp, 32 +; LMULMAX4-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 3 +; LMULMAX4-NEXT: add sp, sp, a0 +; LMULMAX4-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; LMULMAX4-NEXT: addi sp, sp, 48 ; LMULMAX4-NEXT: ret %t = call fastcc <32 x i32> @ext2(<32 x i32> %y, <32 x i32> %x, i32 %w, i32 2) ret <32 x i32> %t @@ -283,58 +637,224 @@ define fastcc <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %w) { ; LMULMAX8-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32: ; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: addi sp, sp, -256 -; LMULMAX8-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX8-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX8-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; LMULMAX8-NEXT: addi sp, sp, -384 +; LMULMAX8-NEXT: .cfi_def_cfa_offset 384 +; LMULMAX8-NEXT: sd ra, 376(sp) # 8-byte Folded Spill +; LMULMAX8-NEXT: sd s0, 368(sp) # 8-byte Folded Spill ; LMULMAX8-NEXT: .cfi_offset ra, -8 ; LMULMAX8-NEXT: .cfi_offset s0, -16 -; LMULMAX8-NEXT: addi s0, sp, 256 +; LMULMAX8-NEXT: addi s0, sp, 384 ; LMULMAX8-NEXT: .cfi_def_cfa s0, 0 +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a2, a2, 3 +; LMULMAX8-NEXT: sub sp, sp, a2 ; LMULMAX8-NEXT: andi sp, sp, -128 +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a3, a2, 3 +; LMULMAX8-NEXT: sub a2, a3, a2 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 368 +; LMULMAX8-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: li a3, 6 +; LMULMAX8-NEXT: mul a2, a2, a3 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 368 +; LMULMAX8-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a3, a2, 2 +; LMULMAX8-NEXT: add a2, a3, a2 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 368 +; LMULMAX8-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a2, a2, 2 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 368 +; LMULMAX8-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a3, a2, 1 +; LMULMAX8-NEXT: add a2, a3, a2 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 368 +; LMULMAX8-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a2, a2, 1 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 368 +; LMULMAX8-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 368 +; LMULMAX8-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: addi a2, sp, 368 +; LMULMAX8-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; LMULMAX8-NEXT: li a2, 32 ; LMULMAX8-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; LMULMAX8-NEXT: vle32.v v24, (a0) -; LMULMAX8-NEXT: mv a3, sp -; LMULMAX8-NEXT: mv a0, sp +; LMULMAX8-NEXT: addi a3, sp, 128 +; LMULMAX8-NEXT: addi a0, sp, 128 ; LMULMAX8-NEXT: li a2, 42 ; LMULMAX8-NEXT: vse32.v v8, (a3) ; LMULMAX8-NEXT: vmv.v.v v8, v24 ; LMULMAX8-NEXT: call ext3@plt -; LMULMAX8-NEXT: addi sp, s0, -256 -; LMULMAX8-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX8-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX8-NEXT: addi sp, sp, 256 +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 3 +; LMULMAX8-NEXT: sub a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 368 +; LMULMAX8-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: li a1, 6 +; LMULMAX8-NEXT: mul a0, a0, a1 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 368 +; LMULMAX8-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 2 +; LMULMAX8-NEXT: add a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 368 +; LMULMAX8-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 2 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 368 +; LMULMAX8-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 1 +; LMULMAX8-NEXT: add a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 368 +; LMULMAX8-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 1 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 368 +; LMULMAX8-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 368 +; LMULMAX8-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: addi a0, sp, 368 +; LMULMAX8-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: addi sp, s0, -384 +; LMULMAX8-NEXT: ld ra, 376(sp) # 8-byte Folded Reload +; LMULMAX8-NEXT: ld s0, 368(sp) # 8-byte Folded Reload +; LMULMAX8-NEXT: addi sp, sp, 384 ; LMULMAX8-NEXT: ret ; ; LMULMAX4-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32: ; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi sp, sp, -256 -; LMULMAX4-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX4-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX4-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; LMULMAX4-NEXT: addi sp, sp, -384 +; LMULMAX4-NEXT: .cfi_def_cfa_offset 384 +; LMULMAX4-NEXT: sd ra, 376(sp) # 8-byte Folded Spill +; LMULMAX4-NEXT: sd s0, 368(sp) # 8-byte Folded Spill ; LMULMAX4-NEXT: .cfi_offset ra, -8 ; LMULMAX4-NEXT: .cfi_offset s0, -16 -; LMULMAX4-NEXT: addi s0, sp, 256 +; LMULMAX4-NEXT: addi s0, sp, 384 ; LMULMAX4-NEXT: .cfi_def_cfa s0, 0 +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a1, a1, 3 +; LMULMAX4-NEXT: sub sp, sp, a1 ; LMULMAX4-NEXT: andi sp, sp, -128 +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a3, a1, 3 +; LMULMAX4-NEXT: sub a1, a3, a1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 368 +; LMULMAX4-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: li a3, 6 +; LMULMAX4-NEXT: mul a1, a1, a3 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 368 +; LMULMAX4-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a3, a1, 2 +; LMULMAX4-NEXT: add a1, a3, a1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 368 +; LMULMAX4-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a1, a1, 2 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 368 +; LMULMAX4-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a3, a1, 1 +; LMULMAX4-NEXT: add a1, a3, a1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 368 +; LMULMAX4-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a1, a1, 1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 368 +; LMULMAX4-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 368 +; LMULMAX4-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: addi a1, sp, 368 +; LMULMAX4-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; LMULMAX4-NEXT: vle32.v v24, (a0) ; LMULMAX4-NEXT: addi a0, a0, 64 ; LMULMAX4-NEXT: vle32.v v28, (a0) -; LMULMAX4-NEXT: addi a0, sp, 64 +; LMULMAX4-NEXT: addi a0, sp, 192 ; LMULMAX4-NEXT: vse32.v v12, (a0) -; LMULMAX4-NEXT: mv a1, sp -; LMULMAX4-NEXT: mv a0, sp +; LMULMAX4-NEXT: addi a1, sp, 128 +; LMULMAX4-NEXT: addi a0, sp, 128 ; LMULMAX4-NEXT: li a3, 42 ; LMULMAX4-NEXT: vse32.v v8, (a1) ; LMULMAX4-NEXT: vmv.v.v v8, v24 ; LMULMAX4-NEXT: vmv.v.v v12, v28 ; LMULMAX4-NEXT: call ext3@plt -; LMULMAX4-NEXT: addi sp, s0, -256 -; LMULMAX4-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: addi sp, sp, 256 +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 3 +; LMULMAX4-NEXT: sub a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 368 +; LMULMAX4-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: li a1, 6 +; LMULMAX4-NEXT: mul a0, a0, a1 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 368 +; LMULMAX4-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 2 +; LMULMAX4-NEXT: add a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 368 +; LMULMAX4-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 2 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 368 +; LMULMAX4-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 1 +; LMULMAX4-NEXT: add a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 368 +; LMULMAX4-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 1 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 368 +; LMULMAX4-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 368 +; LMULMAX4-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: addi a0, sp, 368 +; LMULMAX4-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: addi sp, s0, -384 +; LMULMAX4-NEXT: ld ra, 376(sp) # 8-byte Folded Reload +; LMULMAX4-NEXT: ld s0, 368(sp) # 8-byte Folded Reload +; LMULMAX4-NEXT: addi sp, sp, 384 ; LMULMAX4-NEXT: ret %t = call fastcc <32 x i32> @ext3(<32 x i32> %z, <32 x i32> %y, <32 x i32> %x, i32 %w, i32 42) ret <32 x i32> %t @@ -445,25 +965,214 @@ define fastcc <32 x i32> @vector_arg_direct_stack(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9, i32 %10, i32 %11, i32 %12, i32 %13, <32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %last) { ; LMULMAX8-LABEL: vector_arg_direct_stack: ; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: addi sp, sp, -16 +; LMULMAX8-NEXT: .cfi_def_cfa_offset 16 +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 3 +; LMULMAX8-NEXT: sub sp, sp, a0 +; LMULMAX8-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 3 +; LMULMAX8-NEXT: sub a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: li a1, 6 +; LMULMAX8-NEXT: mul a0, a0, a1 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 2 +; LMULMAX8-NEXT: add a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 2 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 1 +; LMULMAX8-NEXT: add a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 1 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; LMULMAX8-NEXT: addi a0, sp, 16 +; LMULMAX8-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; LMULMAX8-NEXT: li a0, 32 -; LMULMAX8-NEXT: addi a1, sp, 8 +; LMULMAX8-NEXT: csrr a1, vlenb +; LMULMAX8-NEXT: slli a1, a1, 3 +; LMULMAX8-NEXT: add a1, sp, a1 +; LMULMAX8-NEXT: addi a1, a1, 24 ; LMULMAX8-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; LMULMAX8-NEXT: vle32.v v24, (a1) ; LMULMAX8-NEXT: vadd.vv v8, v8, v16 ; LMULMAX8-NEXT: vadd.vv v8, v8, v24 +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 3 +; LMULMAX8-NEXT: sub a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: li a1, 6 +; LMULMAX8-NEXT: mul a0, a0, a1 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 2 +; LMULMAX8-NEXT: add a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 2 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 1 +; LMULMAX8-NEXT: add a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 1 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: addi a0, sp, 16 +; LMULMAX8-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 3 +; LMULMAX8-NEXT: add sp, sp, a0 +; LMULMAX8-NEXT: addi sp, sp, 16 ; LMULMAX8-NEXT: ret ; ; LMULMAX4-LABEL: vector_arg_direct_stack: ; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: addi sp, sp, -16 +; LMULMAX4-NEXT: .cfi_def_cfa_offset 16 +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 3 +; LMULMAX4-NEXT: sub sp, sp, a0 +; LMULMAX4-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 3 +; LMULMAX4-NEXT: sub a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: li a1, 6 +; LMULMAX4-NEXT: mul a0, a0, a1 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 2 +; LMULMAX4-NEXT: add a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 2 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 1 +; LMULMAX4-NEXT: add a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 1 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; LMULMAX4-NEXT: addi a0, sp, 16 +; LMULMAX4-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; LMULMAX4-NEXT: addi a0, sp, 8 +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 3 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 24 ; LMULMAX4-NEXT: vle32.v v24, (a0) -; LMULMAX4-NEXT: addi a0, sp, 72 +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 3 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 88 ; LMULMAX4-NEXT: vle32.v v28, (a0) ; LMULMAX4-NEXT: vadd.vv v12, v12, v20 ; LMULMAX4-NEXT: vadd.vv v8, v8, v16 ; LMULMAX4-NEXT: vadd.vv v8, v8, v24 ; LMULMAX4-NEXT: vadd.vv v12, v12, v28 +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 3 +; LMULMAX4-NEXT: sub a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: li a1, 6 +; LMULMAX4-NEXT: mul a0, a0, a1 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 2 +; LMULMAX4-NEXT: add a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 2 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 1 +; LMULMAX4-NEXT: add a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 1 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: addi a0, sp, 16 +; LMULMAX4-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 3 +; LMULMAX4-NEXT: add sp, sp, a0 +; LMULMAX4-NEXT: addi sp, sp, 16 ; LMULMAX4-NEXT: ret %s = add <32 x i32> %x, %y %t = add <32 x i32> %s, %z diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=8 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX8 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=4 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX4 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=2 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX2 +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 -riscv-v-fixed-length-vector-lmul-max=1 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LMULMAX1 define <4 x i8> @ret_v4i8(ptr %p) { ; CHECK-LABEL: ret_v4i8: @@ -581,16 +581,152 @@ define <32 x i32> @ret_v32i32_param_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %w) { ; LMULMAX8-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32: ; LMULMAX8: # %bb.0: +; LMULMAX8-NEXT: addi sp, sp, -16 +; LMULMAX8-NEXT: .cfi_def_cfa_offset 16 +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a2, a2, 3 +; LMULMAX8-NEXT: sub sp, sp, a2 +; LMULMAX8-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a3, a2, 3 +; LMULMAX8-NEXT: sub a2, a3, a2 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 16 +; LMULMAX8-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: li a3, 6 +; LMULMAX8-NEXT: mul a2, a2, a3 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 16 +; LMULMAX8-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a3, a2, 2 +; LMULMAX8-NEXT: add a2, a3, a2 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 16 +; LMULMAX8-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a2, a2, 2 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 16 +; LMULMAX8-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a3, a2, 1 +; LMULMAX8-NEXT: add a2, a3, a2 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 16 +; LMULMAX8-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a2, a2, 1 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 16 +; LMULMAX8-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 16 +; LMULMAX8-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: addi a2, sp, 16 +; LMULMAX8-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; LMULMAX8-NEXT: li a2, 32 ; LMULMAX8-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; LMULMAX8-NEXT: vle32.v v24, (a0) ; LMULMAX8-NEXT: vadd.vv v8, v8, v16 ; LMULMAX8-NEXT: vadd.vv v8, v8, v24 ; LMULMAX8-NEXT: vadd.vx v8, v8, a1 +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 3 +; LMULMAX8-NEXT: sub a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: li a1, 6 +; LMULMAX8-NEXT: mul a0, a0, a1 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 2 +; LMULMAX8-NEXT: add a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 2 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 1 +; LMULMAX8-NEXT: add a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 1 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 16 +; LMULMAX8-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: addi a0, sp, 16 +; LMULMAX8-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 3 +; LMULMAX8-NEXT: add sp, sp, a0 +; LMULMAX8-NEXT: addi sp, sp, 16 ; LMULMAX8-NEXT: ret ; ; LMULMAX4-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32: ; LMULMAX4: # %bb.0: +; LMULMAX4-NEXT: addi sp, sp, -16 +; LMULMAX4-NEXT: .cfi_def_cfa_offset 16 +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a1, a1, 3 +; LMULMAX4-NEXT: sub sp, sp, a1 +; LMULMAX4-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a3, a1, 3 +; LMULMAX4-NEXT: sub a1, a3, a1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 16 +; LMULMAX4-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: li a3, 6 +; LMULMAX4-NEXT: mul a1, a1, a3 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 16 +; LMULMAX4-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a3, a1, 2 +; LMULMAX4-NEXT: add a1, a3, a1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 16 +; LMULMAX4-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a1, a1, 2 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 16 +; LMULMAX4-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a3, a1, 1 +; LMULMAX4-NEXT: add a1, a3, a1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 16 +; LMULMAX4-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a1, a1, 1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 16 +; LMULMAX4-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 16 +; LMULMAX4-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: addi a1, sp, 16 +; LMULMAX4-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; LMULMAX4-NEXT: addi a1, a0, 64 ; LMULMAX4-NEXT: vle32.v v24, (a1) @@ -601,10 +737,100 @@ ; LMULMAX4-NEXT: vadd.vv v8, v8, v28 ; LMULMAX4-NEXT: vadd.vx v8, v8, a2 ; LMULMAX4-NEXT: vadd.vx v12, v12, a2 +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 3 +; LMULMAX4-NEXT: sub a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: li a1, 6 +; LMULMAX4-NEXT: mul a0, a0, a1 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 2 +; LMULMAX4-NEXT: add a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 2 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 1 +; LMULMAX4-NEXT: add a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 1 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 16 +; LMULMAX4-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: addi a0, sp, 16 +; LMULMAX4-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 3 +; LMULMAX4-NEXT: add sp, sp, a0 +; LMULMAX4-NEXT: addi sp, sp, 16 ; LMULMAX4-NEXT: ret ; ; LMULMAX2-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32: ; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi sp, sp, -16 +; LMULMAX2-NEXT: .cfi_def_cfa_offset 16 +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: slli a1, a1, 3 +; LMULMAX2-NEXT: sub sp, sp, a1 +; LMULMAX2-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: slli a2, a1, 3 +; LMULMAX2-NEXT: sub a1, a2, a1 +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 16 +; LMULMAX2-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: li a2, 6 +; LMULMAX2-NEXT: mul a1, a1, a2 +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 16 +; LMULMAX2-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: slli a2, a1, 2 +; LMULMAX2-NEXT: add a1, a2, a1 +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 16 +; LMULMAX2-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: slli a1, a1, 2 +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 16 +; LMULMAX2-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: slli a2, a1, 1 +; LMULMAX2-NEXT: add a1, a2, a1 +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 16 +; LMULMAX2-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: slli a1, a1, 1 +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 16 +; LMULMAX2-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 16 +; LMULMAX2-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: addi a1, sp, 16 +; LMULMAX2-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vle32.v v24, (a0) ; LMULMAX2-NEXT: addi a1, a0, 32 @@ -625,27 +851,114 @@ ; LMULMAX2-NEXT: vadd.vx v10, v10, a4 ; LMULMAX2-NEXT: vadd.vx v12, v12, a4 ; LMULMAX2-NEXT: vadd.vx v14, v14, a4 +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: slli a1, a0, 3 +; LMULMAX2-NEXT: sub a0, a1, a0 +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 16 +; LMULMAX2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: li a1, 6 +; LMULMAX2-NEXT: mul a0, a0, a1 +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 16 +; LMULMAX2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: slli a1, a0, 2 +; LMULMAX2-NEXT: add a0, a1, a0 +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 16 +; LMULMAX2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: slli a0, a0, 2 +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 16 +; LMULMAX2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: slli a1, a0, 1 +; LMULMAX2-NEXT: add a0, a1, a0 +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 16 +; LMULMAX2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: slli a0, a0, 1 +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 16 +; LMULMAX2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 16 +; LMULMAX2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: addi a0, sp, 16 +; LMULMAX2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: slli a0, a0, 3 +; LMULMAX2-NEXT: add sp, sp, a0 +; LMULMAX2-NEXT: addi sp, sp, 16 ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: ret_v32i32_param_v32i32_v32i32_v32i32_i32: ; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi sp, sp, -16 +; LMULMAX1-NEXT: .cfi_def_cfa_offset 16 +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: slli a1, a1, 3 +; LMULMAX1-NEXT: sub sp, sp, a1 +; LMULMAX1-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: li a2, 6 +; LMULMAX1-NEXT: mul a1, a1, a2 +; LMULMAX1-NEXT: add a1, sp, a1 +; LMULMAX1-NEXT: addi a1, a1, 16 +; LMULMAX1-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: slli a2, a1, 2 +; LMULMAX1-NEXT: add a1, a2, a1 +; LMULMAX1-NEXT: add a1, sp, a1 +; LMULMAX1-NEXT: addi a1, a1, 16 +; LMULMAX1-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: slli a1, a1, 2 +; LMULMAX1-NEXT: add a1, sp, a1 +; LMULMAX1-NEXT: addi a1, a1, 16 +; LMULMAX1-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: slli a2, a1, 1 +; LMULMAX1-NEXT: add a1, a2, a1 +; LMULMAX1-NEXT: add a1, sp, a1 +; LMULMAX1-NEXT: addi a1, a1, 16 +; LMULMAX1-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: slli a1, a1, 1 +; LMULMAX1-NEXT: add a1, sp, a1 +; LMULMAX1-NEXT: addi a1, a1, 16 +; LMULMAX1-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: add a1, sp, a1 +; LMULMAX1-NEXT: addi a1, a1, 16 +; LMULMAX1-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; LMULMAX1-NEXT: addi a1, sp, 16 +; LMULMAX1-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v24, (a0) +; LMULMAX1-NEXT: vle32.v v0, (a0) ; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle32.v v25, (a1) +; LMULMAX1-NEXT: vle32.v v24, (a1) ; LMULMAX1-NEXT: addi a1, a0, 32 -; LMULMAX1-NEXT: vle32.v v26, (a1) +; LMULMAX1-NEXT: vle32.v v25, (a1) ; LMULMAX1-NEXT: addi a1, a0, 48 -; LMULMAX1-NEXT: vle32.v v27, (a1) +; LMULMAX1-NEXT: vle32.v v26, (a1) ; LMULMAX1-NEXT: addi a1, a0, 64 -; LMULMAX1-NEXT: vle32.v v28, (a1) +; LMULMAX1-NEXT: vle32.v v27, (a1) ; LMULMAX1-NEXT: addi a1, a0, 80 -; LMULMAX1-NEXT: vle32.v v29, (a1) +; LMULMAX1-NEXT: vle32.v v28, (a1) ; LMULMAX1-NEXT: addi a1, a0, 96 -; LMULMAX1-NEXT: vle32.v v30, (a1) +; LMULMAX1-NEXT: vle32.v v29, (a1) ; LMULMAX1-NEXT: addi a0, a0, 112 -; LMULMAX1-NEXT: vle32.v v31, (a0) -; LMULMAX1-NEXT: lw a0, 0(sp) +; LMULMAX1-NEXT: vle32.v v30, (a0) +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: slli a0, a0, 3 +; LMULMAX1-NEXT: add a0, sp, a0 +; LMULMAX1-NEXT: lw a0, 16(a0) ; LMULMAX1-NEXT: vadd.vv v8, v8, v16 ; LMULMAX1-NEXT: vadd.vv v9, v9, v17 ; LMULMAX1-NEXT: vadd.vv v10, v10, v18 @@ -654,14 +967,14 @@ ; LMULMAX1-NEXT: vadd.vv v13, v13, v21 ; LMULMAX1-NEXT: vadd.vv v14, v14, v22 ; LMULMAX1-NEXT: vadd.vv v15, v15, v23 -; LMULMAX1-NEXT: vadd.vv v15, v15, v31 -; LMULMAX1-NEXT: vadd.vv v14, v14, v30 -; LMULMAX1-NEXT: vadd.vv v13, v13, v29 -; LMULMAX1-NEXT: vadd.vv v12, v12, v28 -; LMULMAX1-NEXT: vadd.vv v11, v11, v27 -; LMULMAX1-NEXT: vadd.vv v10, v10, v26 -; LMULMAX1-NEXT: vadd.vv v9, v9, v25 -; LMULMAX1-NEXT: vadd.vv v8, v8, v24 +; LMULMAX1-NEXT: vadd.vv v15, v15, v30 +; LMULMAX1-NEXT: vadd.vv v14, v14, v29 +; LMULMAX1-NEXT: vadd.vv v13, v13, v28 +; LMULMAX1-NEXT: vadd.vv v12, v12, v27 +; LMULMAX1-NEXT: vadd.vv v11, v11, v26 +; LMULMAX1-NEXT: vadd.vv v10, v10, v25 +; LMULMAX1-NEXT: vadd.vv v9, v9, v24 +; LMULMAX1-NEXT: vadd.vv v8, v8, v0 ; LMULMAX1-NEXT: vadd.vx v8, v8, a0 ; LMULMAX1-NEXT: vadd.vx v9, v9, a0 ; LMULMAX1-NEXT: vadd.vx v10, v10, a0 @@ -670,6 +983,44 @@ ; LMULMAX1-NEXT: vadd.vx v13, v13, a0 ; LMULMAX1-NEXT: vadd.vx v14, v14, a0 ; LMULMAX1-NEXT: vadd.vx v15, v15, a0 +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: li a1, 6 +; LMULMAX1-NEXT: mul a0, a0, a1 +; LMULMAX1-NEXT: add a0, sp, a0 +; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: slli a1, a0, 2 +; LMULMAX1-NEXT: add a0, a1, a0 +; LMULMAX1-NEXT: add a0, sp, a0 +; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: slli a0, a0, 2 +; LMULMAX1-NEXT: add a0, sp, a0 +; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: slli a1, a0, 1 +; LMULMAX1-NEXT: add a0, a1, a0 +; LMULMAX1-NEXT: add a0, sp, a0 +; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: slli a0, a0, 1 +; LMULMAX1-NEXT: add a0, sp, a0 +; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: add a0, sp, a0 +; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: addi a0, sp, 16 +; LMULMAX1-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: slli a0, a0, 3 +; LMULMAX1-NEXT: add sp, sp, a0 +; LMULMAX1-NEXT: addi sp, sp, 16 ; LMULMAX1-NEXT: ret %r = add <32 x i32> %x, %y %s = add <32 x i32> %r, %z @@ -685,25 +1036,156 @@ define <32 x i32> @ret_v32i32_call_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> %y, i32 %w) { ; LMULMAX8-LABEL: ret_v32i32_call_v32i32_v32i32_i32: ; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: addi sp, sp, -16 -; LMULMAX8-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX8-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LMULMAX8-NEXT: addi sp, sp, -48 +; LMULMAX8-NEXT: .cfi_def_cfa_offset 48 +; LMULMAX8-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; LMULMAX8-NEXT: .cfi_offset ra, -8 +; LMULMAX8-NEXT: csrr a1, vlenb +; LMULMAX8-NEXT: slli a1, a1, 3 +; LMULMAX8-NEXT: sub sp, sp, a1 +; LMULMAX8-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; LMULMAX8-NEXT: csrr a1, vlenb +; LMULMAX8-NEXT: slli a2, a1, 3 +; LMULMAX8-NEXT: sub a1, a2, a1 +; LMULMAX8-NEXT: add a1, sp, a1 +; LMULMAX8-NEXT: addi a1, a1, 32 +; LMULMAX8-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a1, vlenb +; LMULMAX8-NEXT: li a2, 6 +; LMULMAX8-NEXT: mul a1, a1, a2 +; LMULMAX8-NEXT: add a1, sp, a1 +; LMULMAX8-NEXT: addi a1, a1, 32 +; LMULMAX8-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a1, vlenb +; LMULMAX8-NEXT: slli a2, a1, 2 +; LMULMAX8-NEXT: add a1, a2, a1 +; LMULMAX8-NEXT: add a1, sp, a1 +; LMULMAX8-NEXT: addi a1, a1, 32 +; LMULMAX8-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a1, vlenb +; LMULMAX8-NEXT: slli a1, a1, 2 +; LMULMAX8-NEXT: add a1, sp, a1 +; LMULMAX8-NEXT: addi a1, a1, 32 +; LMULMAX8-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a1, vlenb +; LMULMAX8-NEXT: slli a2, a1, 1 +; LMULMAX8-NEXT: add a1, a2, a1 +; LMULMAX8-NEXT: add a1, sp, a1 +; LMULMAX8-NEXT: addi a1, a1, 32 +; LMULMAX8-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a1, vlenb +; LMULMAX8-NEXT: slli a1, a1, 1 +; LMULMAX8-NEXT: add a1, sp, a1 +; LMULMAX8-NEXT: addi a1, a1, 32 +; LMULMAX8-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a1, vlenb +; LMULMAX8-NEXT: add a1, sp, a1 +; LMULMAX8-NEXT: addi a1, a1, 32 +; LMULMAX8-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; LMULMAX8-NEXT: addi a1, sp, 32 +; LMULMAX8-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; LMULMAX8-NEXT: vmv8r.v v24, v8 ; LMULMAX8-NEXT: li a1, 2 ; LMULMAX8-NEXT: vmv8r.v v8, v16 ; LMULMAX8-NEXT: vmv8r.v v16, v24 ; LMULMAX8-NEXT: call ext2@plt -; LMULMAX8-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; LMULMAX8-NEXT: addi sp, sp, 16 +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 3 +; LMULMAX8-NEXT: sub a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 32 +; LMULMAX8-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: li a1, 6 +; LMULMAX8-NEXT: mul a0, a0, a1 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 32 +; LMULMAX8-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 2 +; LMULMAX8-NEXT: add a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 32 +; LMULMAX8-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 2 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 32 +; LMULMAX8-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 1 +; LMULMAX8-NEXT: add a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 32 +; LMULMAX8-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 1 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 32 +; LMULMAX8-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 32 +; LMULMAX8-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: addi a0, sp, 32 +; LMULMAX8-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 3 +; LMULMAX8-NEXT: add sp, sp, a0 +; LMULMAX8-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; LMULMAX8-NEXT: addi sp, sp, 48 ; LMULMAX8-NEXT: ret ; ; LMULMAX4-LABEL: ret_v32i32_call_v32i32_v32i32_i32: ; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi sp, sp, -16 -; LMULMAX4-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX4-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LMULMAX4-NEXT: addi sp, sp, -48 +; LMULMAX4-NEXT: .cfi_def_cfa_offset 48 +; LMULMAX4-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; LMULMAX4-NEXT: .cfi_offset ra, -8 +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a1, a1, 3 +; LMULMAX4-NEXT: sub sp, sp, a1 +; LMULMAX4-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a2, a1, 3 +; LMULMAX4-NEXT: sub a1, a2, a1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 32 +; LMULMAX4-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: li a2, 6 +; LMULMAX4-NEXT: mul a1, a1, a2 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 32 +; LMULMAX4-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a2, a1, 2 +; LMULMAX4-NEXT: add a1, a2, a1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 32 +; LMULMAX4-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a1, a1, 2 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 32 +; LMULMAX4-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a2, a1, 1 +; LMULMAX4-NEXT: add a1, a2, a1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 32 +; LMULMAX4-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a1, a1, 1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 32 +; LMULMAX4-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 32 +; LMULMAX4-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: addi a1, sp, 32 +; LMULMAX4-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; LMULMAX4-NEXT: vmv4r.v v24, v12 ; LMULMAX4-NEXT: vmv4r.v v28, v8 ; LMULMAX4-NEXT: li a1, 2 @@ -712,16 +1194,103 @@ ; LMULMAX4-NEXT: vmv4r.v v16, v28 ; LMULMAX4-NEXT: vmv4r.v v20, v24 ; LMULMAX4-NEXT: call ext2@plt -; LMULMAX4-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: addi sp, sp, 16 +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 3 +; LMULMAX4-NEXT: sub a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 32 +; LMULMAX4-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: li a1, 6 +; LMULMAX4-NEXT: mul a0, a0, a1 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 32 +; LMULMAX4-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 2 +; LMULMAX4-NEXT: add a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 32 +; LMULMAX4-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 2 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 32 +; LMULMAX4-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 1 +; LMULMAX4-NEXT: add a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 32 +; LMULMAX4-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 1 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 32 +; LMULMAX4-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 32 +; LMULMAX4-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: addi a0, sp, 32 +; LMULMAX4-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 3 +; LMULMAX4-NEXT: add sp, sp, a0 +; LMULMAX4-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; LMULMAX4-NEXT: addi sp, sp, 48 ; LMULMAX4-NEXT: ret ; ; LMULMAX2-LABEL: ret_v32i32_call_v32i32_v32i32_i32: ; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi sp, sp, -16 -; LMULMAX2-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX2-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LMULMAX2-NEXT: addi sp, sp, -48 +; LMULMAX2-NEXT: .cfi_def_cfa_offset 48 +; LMULMAX2-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; LMULMAX2-NEXT: .cfi_offset ra, -8 +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: slli a1, a1, 3 +; LMULMAX2-NEXT: sub sp, sp, a1 +; LMULMAX2-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: slli a2, a1, 3 +; LMULMAX2-NEXT: sub a1, a2, a1 +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 32 +; LMULMAX2-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: li a2, 6 +; LMULMAX2-NEXT: mul a1, a1, a2 +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 32 +; LMULMAX2-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: slli a2, a1, 2 +; LMULMAX2-NEXT: add a1, a2, a1 +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 32 +; LMULMAX2-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: slli a1, a1, 2 +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 32 +; LMULMAX2-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: slli a2, a1, 1 +; LMULMAX2-NEXT: add a1, a2, a1 +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 32 +; LMULMAX2-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: slli a1, a1, 1 +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 32 +; LMULMAX2-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 32 +; LMULMAX2-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: addi a1, sp, 32 +; LMULMAX2-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; LMULMAX2-NEXT: vmv2r.v v24, v14 ; LMULMAX2-NEXT: vmv2r.v v26, v12 ; LMULMAX2-NEXT: vmv2r.v v28, v10 @@ -736,24 +1305,105 @@ ; LMULMAX2-NEXT: vmv2r.v v20, v26 ; LMULMAX2-NEXT: vmv2r.v v22, v24 ; LMULMAX2-NEXT: call ext2@plt -; LMULMAX2-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: addi sp, sp, 16 +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: slli a1, a0, 3 +; LMULMAX2-NEXT: sub a0, a1, a0 +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 32 +; LMULMAX2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: li a1, 6 +; LMULMAX2-NEXT: mul a0, a0, a1 +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 32 +; LMULMAX2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: slli a1, a0, 2 +; LMULMAX2-NEXT: add a0, a1, a0 +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 32 +; LMULMAX2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: slli a0, a0, 2 +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 32 +; LMULMAX2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: slli a1, a0, 1 +; LMULMAX2-NEXT: add a0, a1, a0 +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 32 +; LMULMAX2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: slli a0, a0, 1 +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 32 +; LMULMAX2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 32 +; LMULMAX2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: addi a0, sp, 32 +; LMULMAX2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: slli a0, a0, 3 +; LMULMAX2-NEXT: add sp, sp, a0 +; LMULMAX2-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; LMULMAX2-NEXT: addi sp, sp, 48 ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: ret_v32i32_call_v32i32_v32i32_i32: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi sp, sp, -16 -; LMULMAX1-NEXT: .cfi_def_cfa_offset 16 -; LMULMAX1-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LMULMAX1-NEXT: addi sp, sp, -48 +; LMULMAX1-NEXT: .cfi_def_cfa_offset 48 +; LMULMAX1-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; LMULMAX1-NEXT: .cfi_offset ra, -8 -; LMULMAX1-NEXT: vmv1r.v v24, v15 -; LMULMAX1-NEXT: vmv1r.v v25, v14 -; LMULMAX1-NEXT: vmv1r.v v26, v13 -; LMULMAX1-NEXT: vmv1r.v v27, v12 -; LMULMAX1-NEXT: vmv1r.v v28, v11 -; LMULMAX1-NEXT: vmv1r.v v29, v10 -; LMULMAX1-NEXT: vmv1r.v v30, v9 -; LMULMAX1-NEXT: vmv1r.v v31, v8 +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: slli a1, a1, 3 +; LMULMAX1-NEXT: sub sp, sp, a1 +; LMULMAX1-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: li a2, 6 +; LMULMAX1-NEXT: mul a1, a1, a2 +; LMULMAX1-NEXT: add a1, sp, a1 +; LMULMAX1-NEXT: addi a1, a1, 32 +; LMULMAX1-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: slli a2, a1, 2 +; LMULMAX1-NEXT: add a1, a2, a1 +; LMULMAX1-NEXT: add a1, sp, a1 +; LMULMAX1-NEXT: addi a1, a1, 32 +; LMULMAX1-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: slli a1, a1, 2 +; LMULMAX1-NEXT: add a1, sp, a1 +; LMULMAX1-NEXT: addi a1, a1, 32 +; LMULMAX1-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: slli a2, a1, 1 +; LMULMAX1-NEXT: add a1, a2, a1 +; LMULMAX1-NEXT: add a1, sp, a1 +; LMULMAX1-NEXT: addi a1, a1, 32 +; LMULMAX1-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: slli a1, a1, 1 +; LMULMAX1-NEXT: add a1, sp, a1 +; LMULMAX1-NEXT: addi a1, a1, 32 +; LMULMAX1-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: add a1, sp, a1 +; LMULMAX1-NEXT: addi a1, a1, 32 +; LMULMAX1-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; LMULMAX1-NEXT: addi a1, sp, 32 +; LMULMAX1-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; LMULMAX1-NEXT: vmv1r.v v0, v15 +; LMULMAX1-NEXT: vmv1r.v v24, v14 +; LMULMAX1-NEXT: vmv1r.v v25, v13 +; LMULMAX1-NEXT: vmv1r.v v26, v12 +; LMULMAX1-NEXT: vmv1r.v v27, v11 +; LMULMAX1-NEXT: vmv1r.v v28, v10 +; LMULMAX1-NEXT: vmv1r.v v29, v9 +; LMULMAX1-NEXT: vmv1r.v v30, v8 ; LMULMAX1-NEXT: li a1, 2 ; LMULMAX1-NEXT: vmv1r.v v8, v16 ; LMULMAX1-NEXT: vmv1r.v v9, v17 @@ -763,17 +1413,54 @@ ; LMULMAX1-NEXT: vmv1r.v v13, v21 ; LMULMAX1-NEXT: vmv1r.v v14, v22 ; LMULMAX1-NEXT: vmv1r.v v15, v23 -; LMULMAX1-NEXT: vmv1r.v v16, v31 -; LMULMAX1-NEXT: vmv1r.v v17, v30 -; LMULMAX1-NEXT: vmv1r.v v18, v29 -; LMULMAX1-NEXT: vmv1r.v v19, v28 -; LMULMAX1-NEXT: vmv1r.v v20, v27 -; LMULMAX1-NEXT: vmv1r.v v21, v26 -; LMULMAX1-NEXT: vmv1r.v v22, v25 -; LMULMAX1-NEXT: vmv1r.v v23, v24 +; LMULMAX1-NEXT: vmv1r.v v16, v30 +; LMULMAX1-NEXT: vmv1r.v v17, v29 +; LMULMAX1-NEXT: vmv1r.v v18, v28 +; LMULMAX1-NEXT: vmv1r.v v19, v27 +; LMULMAX1-NEXT: vmv1r.v v20, v26 +; LMULMAX1-NEXT: vmv1r.v v21, v25 +; LMULMAX1-NEXT: vmv1r.v v22, v24 +; LMULMAX1-NEXT: vmv1r.v v23, v0 ; LMULMAX1-NEXT: call ext2@plt -; LMULMAX1-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: addi sp, sp, 16 +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: li a1, 6 +; LMULMAX1-NEXT: mul a0, a0, a1 +; LMULMAX1-NEXT: add a0, sp, a0 +; LMULMAX1-NEXT: addi a0, a0, 32 +; LMULMAX1-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: slli a1, a0, 2 +; LMULMAX1-NEXT: add a0, a1, a0 +; LMULMAX1-NEXT: add a0, sp, a0 +; LMULMAX1-NEXT: addi a0, a0, 32 +; LMULMAX1-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: slli a0, a0, 2 +; LMULMAX1-NEXT: add a0, sp, a0 +; LMULMAX1-NEXT: addi a0, a0, 32 +; LMULMAX1-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: slli a1, a0, 1 +; LMULMAX1-NEXT: add a0, a1, a0 +; LMULMAX1-NEXT: add a0, sp, a0 +; LMULMAX1-NEXT: addi a0, a0, 32 +; LMULMAX1-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: slli a0, a0, 1 +; LMULMAX1-NEXT: add a0, sp, a0 +; LMULMAX1-NEXT: addi a0, a0, 32 +; LMULMAX1-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: add a0, sp, a0 +; LMULMAX1-NEXT: addi a0, a0, 32 +; LMULMAX1-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: addi a0, sp, 32 +; LMULMAX1-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: slli a0, a0, 3 +; LMULMAX1-NEXT: add sp, sp, a0 +; LMULMAX1-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; LMULMAX1-NEXT: addi sp, sp, 48 ; LMULMAX1-NEXT: ret %t = call <32 x i32> @ext2(<32 x i32> %y, <32 x i32> %x, i32 %w, i32 2) ret <32 x i32> %t @@ -782,71 +1469,280 @@ define <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z, i32 %w) { ; LMULMAX8-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32: ; LMULMAX8: # %bb.0: -; LMULMAX8-NEXT: addi sp, sp, -256 -; LMULMAX8-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX8-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX8-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; LMULMAX8-NEXT: addi sp, sp, -384 +; LMULMAX8-NEXT: .cfi_def_cfa_offset 384 +; LMULMAX8-NEXT: sd ra, 376(sp) # 8-byte Folded Spill +; LMULMAX8-NEXT: sd s0, 368(sp) # 8-byte Folded Spill ; LMULMAX8-NEXT: .cfi_offset ra, -8 ; LMULMAX8-NEXT: .cfi_offset s0, -16 -; LMULMAX8-NEXT: addi s0, sp, 256 +; LMULMAX8-NEXT: addi s0, sp, 384 ; LMULMAX8-NEXT: .cfi_def_cfa s0, 0 +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a2, a2, 3 +; LMULMAX8-NEXT: sub sp, sp, a2 ; LMULMAX8-NEXT: andi sp, sp, -128 +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a3, a2, 3 +; LMULMAX8-NEXT: sub a2, a3, a2 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 368 +; LMULMAX8-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: li a3, 6 +; LMULMAX8-NEXT: mul a2, a2, a3 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 368 +; LMULMAX8-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a3, a2, 2 +; LMULMAX8-NEXT: add a2, a3, a2 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 368 +; LMULMAX8-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a2, a2, 2 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 368 +; LMULMAX8-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a3, a2, 1 +; LMULMAX8-NEXT: add a2, a3, a2 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 368 +; LMULMAX8-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: slli a2, a2, 1 +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 368 +; LMULMAX8-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: csrr a2, vlenb +; LMULMAX8-NEXT: add a2, sp, a2 +; LMULMAX8-NEXT: addi a2, a2, 368 +; LMULMAX8-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; LMULMAX8-NEXT: addi a2, sp, 368 +; LMULMAX8-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; LMULMAX8-NEXT: li a2, 32 ; LMULMAX8-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; LMULMAX8-NEXT: vle32.v v24, (a0) -; LMULMAX8-NEXT: mv a3, sp -; LMULMAX8-NEXT: mv a0, sp +; LMULMAX8-NEXT: addi a3, sp, 128 +; LMULMAX8-NEXT: addi a0, sp, 128 ; LMULMAX8-NEXT: li a2, 42 ; LMULMAX8-NEXT: vse32.v v8, (a3) ; LMULMAX8-NEXT: vmv.v.v v8, v24 ; LMULMAX8-NEXT: call ext3@plt -; LMULMAX8-NEXT: addi sp, s0, -256 -; LMULMAX8-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX8-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX8-NEXT: addi sp, sp, 256 +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 3 +; LMULMAX8-NEXT: sub a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 368 +; LMULMAX8-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: li a1, 6 +; LMULMAX8-NEXT: mul a0, a0, a1 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 368 +; LMULMAX8-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 2 +; LMULMAX8-NEXT: add a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 368 +; LMULMAX8-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 2 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 368 +; LMULMAX8-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a1, a0, 1 +; LMULMAX8-NEXT: add a0, a1, a0 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 368 +; LMULMAX8-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: slli a0, a0, 1 +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 368 +; LMULMAX8-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: csrr a0, vlenb +; LMULMAX8-NEXT: add a0, sp, a0 +; LMULMAX8-NEXT: addi a0, a0, 368 +; LMULMAX8-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: addi a0, sp, 368 +; LMULMAX8-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; LMULMAX8-NEXT: addi sp, s0, -384 +; LMULMAX8-NEXT: ld ra, 376(sp) # 8-byte Folded Reload +; LMULMAX8-NEXT: ld s0, 368(sp) # 8-byte Folded Reload +; LMULMAX8-NEXT: addi sp, sp, 384 ; LMULMAX8-NEXT: ret ; ; LMULMAX4-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32: ; LMULMAX4: # %bb.0: -; LMULMAX4-NEXT: addi sp, sp, -256 -; LMULMAX4-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX4-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX4-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; LMULMAX4-NEXT: addi sp, sp, -384 +; LMULMAX4-NEXT: .cfi_def_cfa_offset 384 +; LMULMAX4-NEXT: sd ra, 376(sp) # 8-byte Folded Spill +; LMULMAX4-NEXT: sd s0, 368(sp) # 8-byte Folded Spill ; LMULMAX4-NEXT: .cfi_offset ra, -8 ; LMULMAX4-NEXT: .cfi_offset s0, -16 -; LMULMAX4-NEXT: addi s0, sp, 256 +; LMULMAX4-NEXT: addi s0, sp, 384 ; LMULMAX4-NEXT: .cfi_def_cfa s0, 0 +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a1, a1, 3 +; LMULMAX4-NEXT: sub sp, sp, a1 ; LMULMAX4-NEXT: andi sp, sp, -128 +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a3, a1, 3 +; LMULMAX4-NEXT: sub a1, a3, a1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 368 +; LMULMAX4-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: li a3, 6 +; LMULMAX4-NEXT: mul a1, a1, a3 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 368 +; LMULMAX4-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a3, a1, 2 +; LMULMAX4-NEXT: add a1, a3, a1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 368 +; LMULMAX4-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a1, a1, 2 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 368 +; LMULMAX4-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a3, a1, 1 +; LMULMAX4-NEXT: add a1, a3, a1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 368 +; LMULMAX4-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: slli a1, a1, 1 +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 368 +; LMULMAX4-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: csrr a1, vlenb +; LMULMAX4-NEXT: add a1, sp, a1 +; LMULMAX4-NEXT: addi a1, a1, 368 +; LMULMAX4-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; LMULMAX4-NEXT: addi a1, sp, 368 +; LMULMAX4-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; LMULMAX4-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; LMULMAX4-NEXT: vle32.v v24, (a0) ; LMULMAX4-NEXT: addi a0, a0, 64 ; LMULMAX4-NEXT: vle32.v v28, (a0) -; LMULMAX4-NEXT: addi a0, sp, 64 +; LMULMAX4-NEXT: addi a0, sp, 192 ; LMULMAX4-NEXT: vse32.v v12, (a0) -; LMULMAX4-NEXT: mv a1, sp -; LMULMAX4-NEXT: mv a0, sp +; LMULMAX4-NEXT: addi a1, sp, 128 +; LMULMAX4-NEXT: addi a0, sp, 128 ; LMULMAX4-NEXT: li a3, 42 ; LMULMAX4-NEXT: vse32.v v8, (a1) ; LMULMAX4-NEXT: vmv.v.v v8, v24 ; LMULMAX4-NEXT: vmv.v.v v12, v28 ; LMULMAX4-NEXT: call ext3@plt -; LMULMAX4-NEXT: addi sp, s0, -256 -; LMULMAX4-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX4-NEXT: addi sp, sp, 256 +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 3 +; LMULMAX4-NEXT: sub a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 368 +; LMULMAX4-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: li a1, 6 +; LMULMAX4-NEXT: mul a0, a0, a1 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 368 +; LMULMAX4-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 2 +; LMULMAX4-NEXT: add a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 368 +; LMULMAX4-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 2 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 368 +; LMULMAX4-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a1, a0, 1 +; LMULMAX4-NEXT: add a0, a1, a0 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 368 +; LMULMAX4-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: slli a0, a0, 1 +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 368 +; LMULMAX4-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: csrr a0, vlenb +; LMULMAX4-NEXT: add a0, sp, a0 +; LMULMAX4-NEXT: addi a0, a0, 368 +; LMULMAX4-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: addi a0, sp, 368 +; LMULMAX4-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; LMULMAX4-NEXT: addi sp, s0, -384 +; LMULMAX4-NEXT: ld ra, 376(sp) # 8-byte Folded Reload +; LMULMAX4-NEXT: ld s0, 368(sp) # 8-byte Folded Reload +; LMULMAX4-NEXT: addi sp, sp, 384 ; LMULMAX4-NEXT: ret ; ; LMULMAX2-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32: ; LMULMAX2: # %bb.0: -; LMULMAX2-NEXT: addi sp, sp, -256 -; LMULMAX2-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX2-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX2-NEXT: sd s0, 240(sp) # 8-byte Folded Spill +; LMULMAX2-NEXT: addi sp, sp, -384 +; LMULMAX2-NEXT: .cfi_def_cfa_offset 384 +; LMULMAX2-NEXT: sd ra, 376(sp) # 8-byte Folded Spill +; LMULMAX2-NEXT: sd s0, 368(sp) # 8-byte Folded Spill ; LMULMAX2-NEXT: .cfi_offset ra, -8 ; LMULMAX2-NEXT: .cfi_offset s0, -16 -; LMULMAX2-NEXT: addi s0, sp, 256 +; LMULMAX2-NEXT: addi s0, sp, 384 ; LMULMAX2-NEXT: .cfi_def_cfa s0, 0 +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: slli a1, a1, 3 +; LMULMAX2-NEXT: sub sp, sp, a1 ; LMULMAX2-NEXT: andi sp, sp, -128 +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: slli a2, a1, 3 +; LMULMAX2-NEXT: sub a1, a2, a1 +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 368 +; LMULMAX2-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: li a2, 6 +; LMULMAX2-NEXT: mul a1, a1, a2 +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 368 +; LMULMAX2-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: slli a2, a1, 2 +; LMULMAX2-NEXT: add a1, a2, a1 +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 368 +; LMULMAX2-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: slli a1, a1, 2 +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 368 +; LMULMAX2-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: slli a2, a1, 1 +; LMULMAX2-NEXT: add a1, a2, a1 +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 368 +; LMULMAX2-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: slli a1, a1, 1 +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 368 +; LMULMAX2-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 368 +; LMULMAX2-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: addi a1, sp, 368 +; LMULMAX2-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vle32.v v24, (a0) ; LMULMAX2-NEXT: addi a1, a0, 32 @@ -855,14 +1751,14 @@ ; LMULMAX2-NEXT: vle32.v v28, (a1) ; LMULMAX2-NEXT: addi a0, a0, 96 ; LMULMAX2-NEXT: vle32.v v30, (a0) -; LMULMAX2-NEXT: addi a0, sp, 96 +; LMULMAX2-NEXT: addi a0, sp, 224 ; LMULMAX2-NEXT: vse32.v v14, (a0) -; LMULMAX2-NEXT: addi a0, sp, 64 +; LMULMAX2-NEXT: addi a0, sp, 192 ; LMULMAX2-NEXT: vse32.v v12, (a0) -; LMULMAX2-NEXT: addi a0, sp, 32 +; LMULMAX2-NEXT: addi a0, sp, 160 ; LMULMAX2-NEXT: vse32.v v10, (a0) -; LMULMAX2-NEXT: mv a1, sp -; LMULMAX2-NEXT: mv a0, sp +; LMULMAX2-NEXT: addi a1, sp, 128 +; LMULMAX2-NEXT: addi a0, sp, 128 ; LMULMAX2-NEXT: li a5, 42 ; LMULMAX2-NEXT: vse32.v v8, (a1) ; LMULMAX2-NEXT: vmv.v.v v8, v24 @@ -870,79 +1766,190 @@ ; LMULMAX2-NEXT: vmv.v.v v12, v28 ; LMULMAX2-NEXT: vmv.v.v v14, v30 ; LMULMAX2-NEXT: call ext3@plt -; LMULMAX2-NEXT: addi sp, s0, -256 -; LMULMAX2-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX2-NEXT: addi sp, sp, 256 +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: slli a1, a0, 3 +; LMULMAX2-NEXT: sub a0, a1, a0 +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 368 +; LMULMAX2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: li a1, 6 +; LMULMAX2-NEXT: mul a0, a0, a1 +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 368 +; LMULMAX2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: slli a1, a0, 2 +; LMULMAX2-NEXT: add a0, a1, a0 +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 368 +; LMULMAX2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: slli a0, a0, 2 +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 368 +; LMULMAX2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: slli a1, a0, 1 +; LMULMAX2-NEXT: add a0, a1, a0 +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 368 +; LMULMAX2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: slli a0, a0, 1 +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 368 +; LMULMAX2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 368 +; LMULMAX2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: addi a0, sp, 368 +; LMULMAX2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: addi sp, s0, -384 +; LMULMAX2-NEXT: ld ra, 376(sp) # 8-byte Folded Reload +; LMULMAX2-NEXT: ld s0, 368(sp) # 8-byte Folded Reload +; LMULMAX2-NEXT: addi sp, sp, 384 ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: ret_v32i32_call_v32i32_v32i32_v32i32_i32: ; LMULMAX1: # %bb.0: -; LMULMAX1-NEXT: addi sp, sp, -256 -; LMULMAX1-NEXT: .cfi_def_cfa_offset 256 -; LMULMAX1-NEXT: sd ra, 248(sp) # 8-byte Folded Spill -; LMULMAX1-NEXT: sd s0, 240(sp) # 8-byte Folded Spill -; LMULMAX1-NEXT: sd s1, 232(sp) # 8-byte Folded Spill +; LMULMAX1-NEXT: addi sp, sp, -400 +; LMULMAX1-NEXT: .cfi_def_cfa_offset 400 +; LMULMAX1-NEXT: sd ra, 392(sp) # 8-byte Folded Spill +; LMULMAX1-NEXT: sd s0, 384(sp) # 8-byte Folded Spill +; LMULMAX1-NEXT: sd s1, 376(sp) # 8-byte Folded Spill ; LMULMAX1-NEXT: .cfi_offset ra, -8 ; LMULMAX1-NEXT: .cfi_offset s0, -16 ; LMULMAX1-NEXT: .cfi_offset s1, -24 -; LMULMAX1-NEXT: addi s0, sp, 256 +; LMULMAX1-NEXT: addi s0, sp, 400 ; LMULMAX1-NEXT: .cfi_def_cfa s0, 0 +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: slli a1, a1, 3 +; LMULMAX1-NEXT: sub sp, sp, a1 ; LMULMAX1-NEXT: andi sp, sp, -128 ; LMULMAX1-NEXT: mv s1, sp +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: li a2, 6 +; LMULMAX1-NEXT: mul a1, a1, a2 +; LMULMAX1-NEXT: add a1, s1, a1 +; LMULMAX1-NEXT: addi a1, a1, 368 +; LMULMAX1-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: slli a2, a1, 2 +; LMULMAX1-NEXT: add a1, a2, a1 +; LMULMAX1-NEXT: add a1, s1, a1 +; LMULMAX1-NEXT: addi a1, a1, 368 +; LMULMAX1-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: slli a1, a1, 2 +; LMULMAX1-NEXT: add a1, s1, a1 +; LMULMAX1-NEXT: addi a1, a1, 368 +; LMULMAX1-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: slli a2, a1, 1 +; LMULMAX1-NEXT: add a1, a2, a1 +; LMULMAX1-NEXT: add a1, s1, a1 +; LMULMAX1-NEXT: addi a1, a1, 368 +; LMULMAX1-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: slli a1, a1, 1 +; LMULMAX1-NEXT: add a1, s1, a1 +; LMULMAX1-NEXT: addi a1, a1, 368 +; LMULMAX1-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: add a1, s1, a1 +; LMULMAX1-NEXT: addi a1, a1, 368 +; LMULMAX1-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; LMULMAX1-NEXT: addi a1, s1, 368 +; LMULMAX1-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v24, (a0) +; LMULMAX1-NEXT: vle32.v v0, (a0) ; LMULMAX1-NEXT: addi a1, a0, 16 -; LMULMAX1-NEXT: vle32.v v25, (a1) +; LMULMAX1-NEXT: vle32.v v24, (a1) ; LMULMAX1-NEXT: addi a1, a0, 32 -; LMULMAX1-NEXT: vle32.v v26, (a1) +; LMULMAX1-NEXT: vle32.v v25, (a1) ; LMULMAX1-NEXT: addi a1, a0, 48 -; LMULMAX1-NEXT: vle32.v v27, (a1) +; LMULMAX1-NEXT: vle32.v v26, (a1) ; LMULMAX1-NEXT: addi a1, a0, 64 -; LMULMAX1-NEXT: vle32.v v28, (a1) +; LMULMAX1-NEXT: vle32.v v27, (a1) ; LMULMAX1-NEXT: addi a1, a0, 80 -; LMULMAX1-NEXT: vle32.v v29, (a1) +; LMULMAX1-NEXT: vle32.v v28, (a1) ; LMULMAX1-NEXT: addi a1, a0, 96 -; LMULMAX1-NEXT: vle32.v v30, (a1) +; LMULMAX1-NEXT: vle32.v v29, (a1) ; LMULMAX1-NEXT: addi a0, a0, 112 -; LMULMAX1-NEXT: vle32.v v31, (a0) +; LMULMAX1-NEXT: vle32.v v30, (a0) ; LMULMAX1-NEXT: ld a1, 0(s0) ; LMULMAX1-NEXT: addi sp, sp, -16 -; LMULMAX1-NEXT: addi a0, s1, 112 +; LMULMAX1-NEXT: addi a0, s1, 240 ; LMULMAX1-NEXT: vse32.v v15, (a0) -; LMULMAX1-NEXT: addi a0, s1, 96 +; LMULMAX1-NEXT: addi a0, s1, 224 ; LMULMAX1-NEXT: vse32.v v14, (a0) -; LMULMAX1-NEXT: addi a0, s1, 80 +; LMULMAX1-NEXT: addi a0, s1, 208 ; LMULMAX1-NEXT: vse32.v v13, (a0) -; LMULMAX1-NEXT: addi a0, s1, 64 +; LMULMAX1-NEXT: addi a0, s1, 192 ; LMULMAX1-NEXT: vse32.v v12, (a0) -; LMULMAX1-NEXT: addi a0, s1, 48 +; LMULMAX1-NEXT: addi a0, s1, 176 ; LMULMAX1-NEXT: vse32.v v11, (a0) -; LMULMAX1-NEXT: addi a0, s1, 32 +; LMULMAX1-NEXT: addi a0, s1, 160 ; LMULMAX1-NEXT: vse32.v v10, (a0) -; LMULMAX1-NEXT: addi a0, s1, 16 +; LMULMAX1-NEXT: addi a0, s1, 144 ; LMULMAX1-NEXT: vse32.v v9, (a0) -; LMULMAX1-NEXT: mv a0, s1 +; LMULMAX1-NEXT: addi a0, s1, 128 ; LMULMAX1-NEXT: vse32.v v8, (a0) ; LMULMAX1-NEXT: li a0, 42 ; LMULMAX1-NEXT: sd a0, 8(sp) -; LMULMAX1-NEXT: mv a0, s1 +; LMULMAX1-NEXT: addi a0, s1, 128 ; LMULMAX1-NEXT: sd a1, 0(sp) -; LMULMAX1-NEXT: vmv.v.v v8, v24 -; LMULMAX1-NEXT: vmv.v.v v9, v25 -; LMULMAX1-NEXT: vmv.v.v v10, v26 -; LMULMAX1-NEXT: vmv.v.v v11, v27 -; LMULMAX1-NEXT: vmv.v.v v12, v28 -; LMULMAX1-NEXT: vmv.v.v v13, v29 -; LMULMAX1-NEXT: vmv.v.v v14, v30 -; LMULMAX1-NEXT: vmv.v.v v15, v31 +; LMULMAX1-NEXT: vmv.v.v v8, v0 +; LMULMAX1-NEXT: vmv.v.v v9, v24 +; LMULMAX1-NEXT: vmv.v.v v10, v25 +; LMULMAX1-NEXT: vmv.v.v v11, v26 +; LMULMAX1-NEXT: vmv.v.v v12, v27 +; LMULMAX1-NEXT: vmv.v.v v13, v28 +; LMULMAX1-NEXT: vmv.v.v v14, v29 +; LMULMAX1-NEXT: vmv.v.v v15, v30 ; LMULMAX1-NEXT: call ext3@plt ; LMULMAX1-NEXT: addi sp, sp, 16 -; LMULMAX1-NEXT: addi sp, s0, -256 -; LMULMAX1-NEXT: ld ra, 248(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: ld s0, 240(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: ld s1, 232(sp) # 8-byte Folded Reload -; LMULMAX1-NEXT: addi sp, sp, 256 +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: li a1, 6 +; LMULMAX1-NEXT: mul a0, a0, a1 +; LMULMAX1-NEXT: add a0, s1, a0 +; LMULMAX1-NEXT: addi a0, a0, 368 +; LMULMAX1-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: slli a1, a0, 2 +; LMULMAX1-NEXT: add a0, a1, a0 +; LMULMAX1-NEXT: add a0, s1, a0 +; LMULMAX1-NEXT: addi a0, a0, 368 +; LMULMAX1-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: slli a0, a0, 2 +; LMULMAX1-NEXT: add a0, s1, a0 +; LMULMAX1-NEXT: addi a0, a0, 368 +; LMULMAX1-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: slli a1, a0, 1 +; LMULMAX1-NEXT: add a0, a1, a0 +; LMULMAX1-NEXT: add a0, s1, a0 +; LMULMAX1-NEXT: addi a0, a0, 368 +; LMULMAX1-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: slli a0, a0, 1 +; LMULMAX1-NEXT: add a0, s1, a0 +; LMULMAX1-NEXT: addi a0, a0, 368 +; LMULMAX1-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: add a0, s1, a0 +; LMULMAX1-NEXT: addi a0, a0, 368 +; LMULMAX1-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: addi a0, s1, 368 +; LMULMAX1-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: addi sp, s0, -400 +; LMULMAX1-NEXT: ld ra, 392(sp) # 8-byte Folded Reload +; LMULMAX1-NEXT: ld s0, 384(sp) # 8-byte Folded Reload +; LMULMAX1-NEXT: ld s1, 376(sp) # 8-byte Folded Reload +; LMULMAX1-NEXT: addi sp, sp, 400 ; LMULMAX1-NEXT: ret %t = call <32 x i32> @ext3(<32 x i32> %z, <32 x i32> %y, <32 x i32> %x, i32 %w, i32 42) ret <32 x i32> %t @@ -982,6 +1989,18 @@ ; ; LMULMAX2-LABEL: split_vector_args: ; LMULMAX2: # %bb.0: +; LMULMAX2-NEXT: addi sp, sp, -16 +; LMULMAX2-NEXT: .cfi_def_cfa_offset 16 +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: slli a1, a1, 1 +; LMULMAX2-NEXT: sub sp, sp, a1 +; LMULMAX2-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; LMULMAX2-NEXT: csrr a1, vlenb +; LMULMAX2-NEXT: add a1, sp, a1 +; LMULMAX2-NEXT: addi a1, a1, 16 +; LMULMAX2-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; LMULMAX2-NEXT: addi a1, sp, 16 +; LMULMAX2-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill ; LMULMAX2-NEXT: addi a1, a0, 64 ; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; LMULMAX2-NEXT: vle32.v v10, (a0) @@ -992,17 +2011,39 @@ ; LMULMAX2-NEXT: vadd.vv v10, v16, v10 ; LMULMAX2-NEXT: vadd.vv v12, v18, v12 ; LMULMAX2-NEXT: vadd.vv v14, v20, v24 +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: add a0, sp, a0 +; LMULMAX2-NEXT: addi a0, a0, 16 +; LMULMAX2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: addi a0, sp, 16 +; LMULMAX2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX2-NEXT: csrr a0, vlenb +; LMULMAX2-NEXT: slli a0, a0, 1 +; LMULMAX2-NEXT: add sp, sp, a0 +; LMULMAX2-NEXT: addi sp, sp, 16 ; LMULMAX2-NEXT: ret ; ; LMULMAX1-LABEL: split_vector_args: ; LMULMAX1: # %bb.0: +; LMULMAX1-NEXT: addi sp, sp, -16 +; LMULMAX1-NEXT: .cfi_def_cfa_offset 16 +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: slli a1, a1, 1 +; LMULMAX1-NEXT: sub sp, sp, a1 +; LMULMAX1-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; LMULMAX1-NEXT: csrr a1, vlenb +; LMULMAX1-NEXT: add a1, sp, a1 +; LMULMAX1-NEXT: addi a1, a1, 16 +; LMULMAX1-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; LMULMAX1-NEXT: addi a1, sp, 16 +; LMULMAX1-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill ; LMULMAX1-NEXT: addi a1, a0, 64 ; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-NEXT: vle32.v v24, (a1) +; LMULMAX1-NEXT: vle32.v v0, (a1) ; LMULMAX1-NEXT: addi a1, a0, 48 -; LMULMAX1-NEXT: vle32.v v25, (a1) +; LMULMAX1-NEXT: vle32.v v24, (a1) ; LMULMAX1-NEXT: addi a1, a0, 32 -; LMULMAX1-NEXT: vle32.v v26, (a1) +; LMULMAX1-NEXT: vle32.v v25, (a1) ; LMULMAX1-NEXT: addi a1, a0, 16 ; LMULMAX1-NEXT: vle32.v v12, (a1) ; LMULMAX1-NEXT: vle32.v v11, (a0) @@ -1011,9 +2052,19 @@ ; LMULMAX1-NEXT: vadd.vv v10, v15, v23 ; LMULMAX1-NEXT: vadd.vv v11, v16, v11 ; LMULMAX1-NEXT: vadd.vv v12, v17, v12 -; LMULMAX1-NEXT: vadd.vv v13, v18, v26 -; LMULMAX1-NEXT: vadd.vv v14, v19, v25 -; LMULMAX1-NEXT: vadd.vv v15, v20, v24 +; LMULMAX1-NEXT: vadd.vv v13, v18, v25 +; LMULMAX1-NEXT: vadd.vv v14, v19, v24 +; LMULMAX1-NEXT: vadd.vv v15, v20, v0 +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: add a0, sp, a0 +; LMULMAX1-NEXT: addi a0, a0, 16 +; LMULMAX1-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: addi a0, sp, 16 +; LMULMAX1-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; LMULMAX1-NEXT: csrr a0, vlenb +; LMULMAX1-NEXT: slli a0, a0, 1 +; LMULMAX1-NEXT: add sp, sp, a0 +; LMULMAX1-NEXT: addi sp, sp, 16 ; LMULMAX1-NEXT: ret %v0 = add <32 x i32> %y, %z ret <32 x i32> %v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.vp.ceil.v2f16(<2 x half>, <2 x i1>, i32) @@ -503,6 +503,52 @@ define <15 x double> @vp_ceil_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) @@ -518,6 +564,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <15 x double> @llvm.vp.ceil.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v @@ -549,6 +639,52 @@ define <16 x double> @vp_ceil_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) @@ -564,6 +700,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.ceil.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v @@ -598,9 +778,63 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a2, 26 +; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 23 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 22 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 21 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 20 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 19 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -657,7 +891,61 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -674,6 +962,52 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -698,6 +1032,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll @@ -2103,6 +2103,52 @@ define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_v15i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -2154,6 +2200,50 @@ ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_v15i64: @@ -2210,6 +2300,52 @@ define <15 x i64> @vp_ctlz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_v15i64_unmasked: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v16 @@ -2261,6 +2397,50 @@ ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_v15i64_unmasked: @@ -2321,6 +2501,52 @@ define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_v16i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -2372,6 +2598,50 @@ ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_v16i64: @@ -2428,6 +2698,52 @@ define <16 x i64> @vp_ctlz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_v16i64_unmasked: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v16 @@ -2479,6 +2795,50 @@ ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_v16i64_unmasked: @@ -2542,10 +2902,16 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 56 +; RV32-NEXT: li a2, 58 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x3a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 58 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 56 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 @@ -2799,6 +3165,12 @@ ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 56 ; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 58 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2808,9 +3180,15 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: li a2, 18 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 @@ -2910,6 +3288,12 @@ ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 18 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -2920,36 +3304,126 @@ define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_v32i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: vmv8r.v v0, v16 -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: bltu a0, a1, .LBB35_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: .LBB35_2: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: li a2, 56 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 54 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 53 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 52 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 51 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 50 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 49 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 47 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 46 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 45 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 44 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 43 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 42 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 41 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill +; RV32-NEXT: li a1, 16 +; RV32-NEXT: vmv8r.v v0, v16 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bltu a0, a1, .LBB35_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a2, 16 +; RV32-NEXT: .LBB35_2: +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 2 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 16 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v16, -1 ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 @@ -3062,8 +3536,98 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 54 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 53 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 52 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 51 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 50 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 49 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 47 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 46 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 45 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 44 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 43 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 42 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 41 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 56 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -3076,6 +3640,52 @@ ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB35_2: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v8, 1 ; RV64-NEXT: vor.vv v8, v8, v24 @@ -3150,6 +3760,50 @@ ; RV64-NEXT: vand.vx v16, v16, a4 ; RV64-NEXT: vmul.vx v16, v16, a5 ; RV64-NEXT: vsrl.vx v16, v16, a6 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer @@ -5224,6 +5878,52 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_v15i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -5275,6 +5975,50 @@ ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_v15i64: @@ -5331,6 +6075,52 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_v15i64_unmasked: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v16 @@ -5382,9 +6172,53 @@ ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 -; RV32-NEXT: ret -; -; RV64-LABEL: vp_ctlz_zero_undef_v15i64_unmasked: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: vp_ctlz_zero_undef_v15i64_unmasked: ; RV64: # %bb.0: ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v16, v8, 1 @@ -5440,6 +6274,52 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_v16i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -5491,6 +6371,50 @@ ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_v16i64: @@ -5547,6 +6471,52 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_v16i64_unmasked: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v16 @@ -5598,6 +6568,50 @@ ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctlz_zero_undef_v16i64_unmasked: @@ -5659,10 +6673,16 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 56 +; RV32-NEXT: li a2, 58 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x3a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 58 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 56 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 @@ -5916,6 +6936,12 @@ ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 56 ; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 58 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -5925,9 +6951,15 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: li a2, 18 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 @@ -6027,6 +7059,12 @@ ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 18 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -6037,6 +7075,103 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_v32i64_unmasked: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 56 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 54 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 53 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 52 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 51 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 50 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 49 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 47 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 46 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 45 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 44 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 43 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 42 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 41 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 16 ; RV32-NEXT: vmv8r.v v0, v16 ; RV32-NEXT: mv a2, a0 @@ -6044,13 +7179,6 @@ ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB71_2: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v16 @@ -6179,8 +7307,98 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 54 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 53 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 52 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 51 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 50 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 49 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 47 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 46 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 45 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 44 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 43 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 42 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 41 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 56 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -6193,6 +7411,52 @@ ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB71_2: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v8, 1 ; RV64-NEXT: vor.vv v8, v8, v24 @@ -6267,6 +7531,50 @@ ; RV64-NEXT: vand.vx v16, v16, a4 ; RV64-NEXT: vmul.vx v16, v16, a5 ; RV64-NEXT: vsrl.vx v16, v16, a6 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll @@ -1541,6 +1541,52 @@ define <15 x i64> @vp_ctpop_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v15i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: lui a1, 349525 @@ -1576,6 +1622,50 @@ ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_v15i64: @@ -1618,6 +1708,52 @@ define <15 x i64> @vp_ctpop_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v15i64_unmasked: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a1, 349525 @@ -1653,6 +1789,50 @@ ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_v15i64_unmasked: @@ -1699,6 +1879,52 @@ define <16 x i64> @vp_ctpop_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v16i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: lui a1, 349525 @@ -1734,6 +1960,50 @@ ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_v16i64: @@ -1776,6 +2046,52 @@ define <16 x i64> @vp_ctpop_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v16i64_unmasked: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: lui a1, 349525 @@ -1811,6 +2127,50 @@ ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_ctpop_v16i64_unmasked: @@ -1860,10 +2220,16 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 56 +; RV32-NEXT: li a2, 58 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x3a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 58 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 56 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 48 ; RV32-NEXT: mul a1, a1, a2 @@ -2064,6 +2430,12 @@ ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 56 ; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 58 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2073,9 +2445,15 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: li a2, 18 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 @@ -2148,6 +2526,12 @@ ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 18 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -2158,6 +2542,102 @@ define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v32i64_unmasked: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 46 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 45 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 44 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 43 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 42 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 41 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 39 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 38 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 37 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 36 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 35 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 34 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 5 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 5 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a2, a1, 1365 ; RV32-NEXT: li a1, 32 @@ -2169,12 +2649,6 @@ ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB35_2: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: sub sp, sp, a3 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v24, v8, 1 ; RV32-NEXT: vand.vv v24, v24, v0 @@ -2258,7 +2732,97 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 46 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 45 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 44 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 43 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 42 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 41 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 39 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 38 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 37 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 36 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 35 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 34 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 5 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2271,6 +2835,52 @@ ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB35_2: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsrl.vi v24, v8, 1 ; RV64-NEXT: lui a1, 349525 @@ -2318,6 +2928,50 @@ ; RV64-NEXT: vand.vx v16, v16, a3 ; RV64-NEXT: vmul.vx v16, v16, a4 ; RV64-NEXT: vsrl.vx v16, v16, a5 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -1767,6 +1767,52 @@ define <15 x i64> @vp_cttz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v15i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t @@ -1809,6 +1855,50 @@ ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_v15i64: @@ -1855,6 +1945,52 @@ define <15 x i64> @vp_cttz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v15i64_unmasked: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1 @@ -1897,6 +2033,50 @@ ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_v15i64_unmasked: @@ -1947,6 +2127,52 @@ define <16 x i64> @vp_cttz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v16i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t @@ -1989,6 +2215,50 @@ ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_v16i64: @@ -2035,6 +2305,52 @@ define <16 x i64> @vp_cttz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v16i64_unmasked: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1 @@ -2077,6 +2393,50 @@ ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_v16i64_unmasked: @@ -2130,9 +2490,15 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: li a2, 66 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 64 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc2, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 66 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v24, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 40 @@ -2387,6 +2753,12 @@ ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 66 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2396,9 +2768,15 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: li a2, 18 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 @@ -2478,6 +2856,12 @@ ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 18 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -2488,37 +2872,127 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v32i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a2, 16 -; RV32-NEXT: vmv8r.v v0, v16 -; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a2, .LBB35_2 -; RV32-NEXT: # %bb.1: -; RV32-NEXT: li a1, 16 -; RV32-NEXT: .LBB35_2: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a2, a2, a3 -; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; RV32-NEXT: li a2, 32 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v16, -1 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v16, v8, v16 -; RV32-NEXT: li a3, 1 -; RV32-NEXT: vsub.vx v8, v8, a3 -; RV32-NEXT: vand.vv v8, v16, v8 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a4, 349525 -; RV32-NEXT: addi a4, a4, 1365 -; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 56 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 54 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 53 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 52 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 51 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 50 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 49 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 47 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 46 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 45 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 44 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 43 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 42 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 41 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill +; RV32-NEXT: li a2, 16 +; RV32-NEXT: vmv8r.v v0, v16 +; RV32-NEXT: mv a1, a0 +; RV32-NEXT: bltu a0, a2, .LBB35_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a1, 16 +; RV32-NEXT: .LBB35_2: +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v16, -1 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vxor.vv v16, v8, v16 +; RV32-NEXT: li a3, 1 +; RV32-NEXT: vsub.vx v8, v8, a3 +; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: lui a4, 349525 +; RV32-NEXT: addi a4, a4, 1365 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmv.v.x v24, a4 ; RV32-NEXT: csrr a4, vlenb ; RV32-NEXT: li a5, 24 @@ -2610,8 +3084,98 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 54 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 53 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 52 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 51 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 50 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 49 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 47 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 46 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 45 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 44 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 43 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 42 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 41 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 56 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -2624,6 +3188,52 @@ ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB35_2: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 1 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsub.vx v24, v8, a2 @@ -2678,6 +3288,50 @@ ; RV64-NEXT: vand.vx v16, v16, a4 ; RV64-NEXT: vmul.vx v16, v16, a5 ; RV64-NEXT: vsrl.vx v16, v16, a6 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer @@ -4416,6 +5070,52 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v15i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t @@ -4458,6 +5158,50 @@ ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_v15i64: @@ -4504,30 +5248,76 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v15i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: li a1, 1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v16, v8, a1 -; RV32-NEXT: li a1, 32 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.i v24, -1 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vxor.vv v8, v8, v24 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vsrl.vi v16, v8, 1 -; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: addi a2, a2, 1365 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v24, a2 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsub.vv v8, v8, v16 -; RV32-NEXT: lui a2, 209715 -; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32-NEXT: vmv.v.x v16, a2 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v16 -; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill +; RV32-NEXT: li a1, 1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsub.vx v16, v8, a1 +; RV32-NEXT: li a1, 32 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmv.v.i v24, -1 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vxor.vv v8, v8, v24 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: lui a2, 349525 +; RV32-NEXT: addi a2, a2, 1365 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v24, a2 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsub.vv v8, v8, v16 +; RV32-NEXT: lui a2, 209715 +; RV32-NEXT: addi a2, a2, 819 +; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; RV32-NEXT: vmv.v.x v16, a2 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsrl.vi v8, v8, 2 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsrl.vi v16, v8, 4 @@ -4546,6 +5336,50 @@ ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_v15i64_unmasked: @@ -4594,6 +5428,52 @@ define <16 x i64> @vp_cttz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v16i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t @@ -4636,6 +5516,50 @@ ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_v16i64: @@ -4682,6 +5606,52 @@ define <16 x i64> @vp_cttz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v16i64_unmasked: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1 @@ -4724,6 +5694,50 @@ ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vp_cttz_zero_undef_v16i64_unmasked: @@ -4775,9 +5789,15 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: li a2, 66 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 64 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc2, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 66 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v24, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 40 @@ -5032,6 +6052,12 @@ ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 6 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 66 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -5041,9 +6067,15 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: li a2, 18 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 @@ -5123,6 +6155,12 @@ ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 18 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -5133,6 +6171,103 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v32i64_unmasked: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 56 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 54 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 53 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 52 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 51 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 50 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 49 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 47 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 46 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 45 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 44 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 43 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 42 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 41 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a2, 16 ; RV32-NEXT: vmv8r.v v0, v16 ; RV32-NEXT: mv a1, a0 @@ -5140,13 +6275,6 @@ ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB71_2: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 40 -; RV32-NEXT: mul a2, a2, a3 -; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vmv.v.i v16, -1 @@ -5255,8 +6383,98 @@ ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 54 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 53 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 52 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 51 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 50 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 49 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 47 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 46 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 45 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 44 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 43 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 42 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 41 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 56 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -5269,6 +6487,52 @@ ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB71_2: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: li a2, 1 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsub.vx v24, v8, a2 @@ -5323,6 +6587,50 @@ ; RV64-NEXT: vand.vx v16, v16, a4 ; RV64-NEXT: vmul.vx v16, v16, a5 ; RV64-NEXT: vsrl.vx v16, v16, a6 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.vp.floor.v2f16(<2 x half>, <2 x i1>, i32) @@ -503,6 +503,52 @@ define <15 x double> @vp_floor_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) @@ -518,6 +564,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <15 x double> @llvm.vp.floor.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v @@ -549,6 +639,52 @@ define <16 x double> @vp_floor_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) @@ -564,6 +700,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.floor.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v @@ -598,9 +778,63 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a2, 26 +; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 23 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 22 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 21 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 20 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 19 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -657,7 +891,61 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -674,6 +962,52 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -698,6 +1032,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -255,9 +255,98 @@ ; RV32-V128-NEXT: addi sp, sp, -16 ; RV32-V128-NEXT: .cfi_def_cfa_offset 16 ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 4 +; RV32-V128-NEXT: slli a0, a0, 5 ; RV32-V128-NEXT: sub sp, sp, a0 -; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 30 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 29 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 28 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 27 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 26 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 25 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 24 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 23 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 22 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 21 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 20 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 19 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 18 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: slli a1, a0, 4 +; RV32-V128-NEXT: add a0, a1, a0 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: slli a0, a0, 4 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32-V128-NEXT: lui a0, %hi(.LCPI10_0) ; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI10_0) ; RV32-V128-NEXT: li a1, 32 @@ -296,7 +385,96 @@ ; RV32-V128-NEXT: vmv8r.v v8, v0 ; RV32-V128-NEXT: vmv8r.v v16, v24 ; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 30 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 29 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 28 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 27 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 26 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 25 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 24 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 23 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 22 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 21 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 20 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 19 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 18 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: slli a1, a0, 4 +; RV32-V128-NEXT: add a0, a1, a0 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb ; RV32-V128-NEXT: slli a0, a0, 4 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: slli a0, a0, 5 ; RV32-V128-NEXT: add sp, sp, a0 ; RV32-V128-NEXT: addi sp, sp, 16 ; RV32-V128-NEXT: ret @@ -306,9 +484,98 @@ ; RV64-V128-NEXT: addi sp, sp, -16 ; RV64-V128-NEXT: .cfi_def_cfa_offset 16 ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 4 +; RV64-V128-NEXT: slli a0, a0, 5 ; RV64-V128-NEXT: sub sp, sp, a0 -; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 30 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 29 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 28 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 27 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 26 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 25 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 24 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 23 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 22 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 21 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 20 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 19 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 18 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: slli a1, a0, 4 +; RV64-V128-NEXT: add a0, a1, a0 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: slli a0, a0, 4 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV64-V128-NEXT: lui a0, %hi(.LCPI10_0) ; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI10_0) ; RV64-V128-NEXT: li a1, 32 @@ -347,7 +614,96 @@ ; RV64-V128-NEXT: vmv8r.v v8, v0 ; RV64-V128-NEXT: vmv8r.v v16, v24 ; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 30 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 29 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 28 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 27 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 26 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 25 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 24 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 23 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 22 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 21 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 20 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 19 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 18 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: slli a1, a0, 4 +; RV64-V128-NEXT: add a0, a1, a0 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb ; RV64-V128-NEXT: slli a0, a0, 4 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: slli a0, a0, 5 ; RV64-V128-NEXT: add sp, sp, a0 ; RV64-V128-NEXT: addi sp, sp, 16 ; RV64-V128-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s declare <2 x float> @llvm.vp.fpext.v2f32.v2f16(<2 x half>, <2 x i1>, i32) @@ -94,6 +94,58 @@ define <32 x double> @vfpext_v32f32_v32f64(<32 x float> %a, <32 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vfpext_v32f32_v32f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 10 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 @@ -114,6 +166,56 @@ ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.fpext.v32f64.v32f32(<32 x float> %a, <32 x i1> %m, i32 %vl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll @@ -316,6 +316,14 @@ define <32 x i64> @vfptosi_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptosi_v32i64_v32f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 @@ -333,6 +341,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll @@ -316,6 +316,14 @@ define <32 x i64> @vfptoui_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptoui_v32i64_v32f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 @@ -333,6 +341,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.vp.fptrunc.v2f16.v2f32(<2 x float>, <2 x i1>, i32) @@ -94,6 +94,52 @@ define <32 x float> @vfptrunc_v32f32_v32f64(<32 x double> %a, <32 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vfptrunc_v32f32_v32f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v28, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 @@ -115,6 +161,50 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vslideup.vi v16, v24, 16 ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x float> @llvm.vp.fptrunc.v32f64.v32f32(<32 x double> %a, <32 x i1> %m, i32 %vl) ret <32 x float> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fshr-fshl-vp.ll @@ -799,9 +799,56 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: slli a2, a2, 4 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 4 +; RV32-NEXT: sub a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 14 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 13 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 12 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 11 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 10 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma @@ -822,7 +869,54 @@ ; RV32-NEXT: vsrl.vv v16, v24, v16, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 14 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 13 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -832,9 +926,56 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: slli a2, a2, 4 ; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 4 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 14 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 13 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 12 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 11 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 10 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma @@ -851,7 +992,54 @@ ; RV64-NEXT: vsrl.vv v16, v24, v16, v0.t ; RV64-NEXT: vor.vv v8, v8, v16, v0.t ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -866,9 +1054,56 @@ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: slli a2, a2, 4 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 4 +; RV32-NEXT: sub a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 14 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 13 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 12 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 11 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 10 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma @@ -889,7 +1124,54 @@ ; RV32-NEXT: vsll.vv v16, v24, v16, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 4 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 14 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 13 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -899,9 +1181,56 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: slli a2, a2, 4 ; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 4 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 14 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 13 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 12 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 11 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 10 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: addi a2, sp, 16 ; RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma @@ -918,7 +1247,54 @@ ; RV64-NEXT: vsll.vv v16, v24, v16, v0.t ; RV64-NEXT: vor.vv v8, v16, v8, v0.t ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -414,9 +414,98 @@ ; RV32-V128-NEXT: addi sp, sp, -16 ; RV32-V128-NEXT: .cfi_def_cfa_offset 16 ; RV32-V128-NEXT: csrr a0, vlenb -; RV32-V128-NEXT: slli a0, a0, 4 +; RV32-V128-NEXT: slli a0, a0, 5 ; RV32-V128-NEXT: sub sp, sp, a0 -; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV32-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 30 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 29 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 28 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 27 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 26 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 25 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 24 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 23 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 22 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 21 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 20 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 19 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 18 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: slli a1, a0, 4 +; RV32-V128-NEXT: add a0, a1, a0 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: slli a0, a0, 4 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32-V128-NEXT: lui a0, %hi(.LCPI17_0) ; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI17_0) ; RV32-V128-NEXT: li a1, 32 @@ -455,7 +544,96 @@ ; RV32-V128-NEXT: vmv8r.v v8, v0 ; RV32-V128-NEXT: vmv8r.v v16, v24 ; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 30 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 29 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 28 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 27 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 26 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 25 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 24 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 23 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 22 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 21 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 20 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 19 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: li a1, 18 +; RV32-V128-NEXT: mul a0, a0, a1 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: slli a1, a0, 4 +; RV32-V128-NEXT: add a0, a1, a0 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb ; RV32-V128-NEXT: slli a0, a0, 4 +; RV32-V128-NEXT: add a0, sp, a0 +; RV32-V128-NEXT: addi a0, a0, 16 +; RV32-V128-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-V128-NEXT: csrr a0, vlenb +; RV32-V128-NEXT: slli a0, a0, 5 ; RV32-V128-NEXT: add sp, sp, a0 ; RV32-V128-NEXT: addi sp, sp, 16 ; RV32-V128-NEXT: ret @@ -465,9 +643,98 @@ ; RV64-V128-NEXT: addi sp, sp, -16 ; RV64-V128-NEXT: .cfi_def_cfa_offset 16 ; RV64-V128-NEXT: csrr a0, vlenb -; RV64-V128-NEXT: slli a0, a0, 4 +; RV64-V128-NEXT: slli a0, a0, 5 ; RV64-V128-NEXT: sub sp, sp, a0 -; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-V128-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 30 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 29 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 28 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 27 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 26 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 25 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 24 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 23 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 22 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 21 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 20 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 19 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 18 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: slli a1, a0, 4 +; RV64-V128-NEXT: add a0, a1, a0 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: slli a0, a0, 4 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV64-V128-NEXT: lui a0, %hi(.LCPI17_0) ; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI17_0) ; RV64-V128-NEXT: li a1, 32 @@ -506,7 +773,96 @@ ; RV64-V128-NEXT: vmv8r.v v8, v0 ; RV64-V128-NEXT: vmv8r.v v16, v24 ; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 30 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 29 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 28 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 27 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 26 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 25 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 24 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 23 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 22 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 21 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 20 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 19 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: li a1, 18 +; RV64-V128-NEXT: mul a0, a0, a1 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: slli a1, a0, 4 +; RV64-V128-NEXT: add a0, a1, a0 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb ; RV64-V128-NEXT: slli a0, a0, 4 +; RV64-V128-NEXT: add a0, sp, a0 +; RV64-V128-NEXT: addi a0, a0, 16 +; RV64-V128-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-V128-NEXT: csrr a0, vlenb +; RV64-V128-NEXT: slli a0, a0, 5 ; RV64-V128-NEXT: add sp, sp, a0 ; RV64-V128-NEXT: addi sp, sp, 16 ; RV64-V128-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.vp.nearbyint.v2f16(<2 x half>, <2 x i1>, i32) @@ -503,6 +503,52 @@ define <15 x double> @vp_nearbyint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) @@ -518,6 +564,50 @@ ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v @@ -549,6 +639,52 @@ define <16 x double> @vp_nearbyint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) @@ -564,6 +700,50 @@ ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v @@ -595,6 +775,64 @@ define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v32f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 10 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 @@ -636,6 +874,62 @@ ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v @@ -650,6 +944,52 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -674,6 +1014,50 @@ ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll @@ -121,6 +121,18 @@ define float @vpreduce_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_v64f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vslidedown.vi v24, v0, 4 @@ -141,6 +153,16 @@ ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfredusum.vs v25, v16, v25, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %r = call reassoc float @llvm.vp.reduce.fadd.v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 %evl) ret float %r @@ -149,6 +171,18 @@ define float @vpreduce_ord_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_v64f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vslidedown.vi v24, v0, 4 @@ -169,6 +203,16 @@ ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfredosum.vs v25, v16, v25, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %r = call float @llvm.vp.reduce.fadd.v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 %evl) ret float %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare i8 @llvm.vp.reduce.add.v2i8(i8, <2 x i8>, <2 x i1>, i32) @@ -845,6 +845,18 @@ define signext i32 @vpreduce_xor_v64i32(i32 signext %s, <64 x i32> %v, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v64i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vslidedown.vi v24, v0, 4 @@ -865,6 +877,16 @@ ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vredxor.vs v25, v16, v25, v0.t ; CHECK-NEXT: vmv.x.s a0, v25 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl1r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add sp, sp, a1 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %r = call i32 @llvm.vp.reduce.xor.v64i32(i32 %s, <64 x i32> %v, <64 x i1> %m, i32 %evl) ret i32 %r @@ -1417,47 +1439,18 @@ declare i8 @llvm.vp.reduce.mul.v1i8(i8, <1 x i8>, <1 x i1>, i32) define i8 @vpreduce_mul_v1i8(i8 %s, <1 x i8> %v, <1 x i1> %m, i32 zeroext %evl) { -; RV32-LABEL: vpreduce_mul_v1i8: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV32-NEXT: vmv.v.x v9, a1 -; RV32-NEXT: vmsne.vi v9, v9, 0 -; RV32-NEXT: vmand.mm v0, v9, v0 -; RV32-NEXT: vmv.v.i v9, 1 -; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; RV32-NEXT: vmerge.vvm v8, v9, v8, v0 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: call __mulsi3@plt -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; -; RV64-LABEL: vpreduce_mul_v1i8: -; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: mv a2, a0 -; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma -; RV64-NEXT: vmv.v.x v9, a1 -; RV64-NEXT: vmsne.vi v9, v9, 0 -; RV64-NEXT: vmand.mm v0, v9, v0 -; RV64-NEXT: vmv.v.i v9, 1 -; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; RV64-NEXT: vmerge.vvm v8, v9, v8, v0 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: mv a1, a2 -; RV64-NEXT: call __muldi3@plt -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret +; CHECK-LABEL: vpreduce_mul_v1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vmsne.vi v9, v9, 0 +; CHECK-NEXT: vmand.mm v0, v9, v0 +; CHECK-NEXT: vmv.v.i v9, 1 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vmv.x.s a1, v8 +; CHECK-NEXT: mul a0, a1, a0 +; CHECK-NEXT: ret %r = call i8 @llvm.vp.reduce.mul.v1i8(i8 %s, <1 x i8> %v, <1 x i1> %m, i32 %evl) ret i8 %r } @@ -1467,11 +1460,6 @@ define signext i8 @vpreduce_mul_v2i8(i8 signext %s, <2 x i8> %v, <2 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpreduce_mul_v2i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: mv a2, a0 ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vid.v v9 ; RV32-NEXT: vmsltu.vx v9, v9, a1 @@ -1481,22 +1469,14 @@ ; RV32-NEXT: vmerge.vvm v8, v9, v8, v0 ; RV32-NEXT: vrgather.vi v9, v8, 1 ; RV32-NEXT: vmul.vv v8, v8, v9 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: mul a0, a1, a0 ; RV32-NEXT: slli a0, a0, 24 ; RV32-NEXT: srai a0, a0, 24 -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_mul_v2i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: mv a2, a0 ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vid.v v9 ; RV64-NEXT: vmsltu.vx v9, v9, a1 @@ -1506,13 +1486,10 @@ ; RV64-NEXT: vmerge.vvm v8, v9, v8, v0 ; RV64-NEXT: vrgather.vi v9, v8, 1 ; RV64-NEXT: vmul.vv v8, v8, v9 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: mv a1, a2 -; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: vmv.x.s a1, v8 +; RV64-NEXT: mul a0, a1, a0 ; RV64-NEXT: slli a0, a0, 56 ; RV64-NEXT: srai a0, a0, 56 -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %r = call i8 @llvm.vp.reduce.mul.v2i8(i8 %s, <2 x i8> %v, <2 x i1> %m, i32 %evl) ret i8 %r @@ -1523,11 +1500,6 @@ define signext i8 @vpreduce_mul_v4i8(i8 signext %s, <4 x i8> %v, <4 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpreduce_mul_v4i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: mv a2, a0 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vid.v v9 ; RV32-NEXT: vmsltu.vx v9, v9, a1 @@ -1539,22 +1511,14 @@ ; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: vrgather.vi v9, v8, 1 ; RV32-NEXT: vmul.vv v8, v8, v9 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: mul a0, a1, a0 ; RV32-NEXT: slli a0, a0, 24 ; RV32-NEXT: srai a0, a0, 24 -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_mul_v4i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: mv a2, a0 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vid.v v9 ; RV64-NEXT: vmsltu.vx v9, v9, a1 @@ -1566,13 +1530,10 @@ ; RV64-NEXT: vmul.vv v8, v8, v9 ; RV64-NEXT: vrgather.vi v9, v8, 1 ; RV64-NEXT: vmul.vv v8, v8, v9 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: mv a1, a2 -; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: vmv.x.s a1, v8 +; RV64-NEXT: mul a0, a1, a0 ; RV64-NEXT: slli a0, a0, 56 ; RV64-NEXT: srai a0, a0, 56 -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %r = call i8 @llvm.vp.reduce.mul.v4i8(i8 %s, <4 x i8> %v, <4 x i1> %m, i32 %evl) ret i8 %r @@ -1583,11 +1544,6 @@ define signext i8 @vpreduce_mul_v8i8(i8 signext %s, <8 x i8> %v, <8 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpreduce_mul_v8i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: mv a2, a0 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vid.v v10 ; RV32-NEXT: vmsltu.vx v9, v10, a1 @@ -1601,22 +1557,14 @@ ; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: vrgather.vi v9, v8, 1 ; RV32-NEXT: vmul.vv v8, v8, v9 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: mul a0, a1, a0 ; RV32-NEXT: slli a0, a0, 24 ; RV32-NEXT: srai a0, a0, 24 -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_mul_v8i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: mv a2, a0 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vid.v v10 ; RV64-NEXT: vmsltu.vx v9, v10, a1 @@ -1630,13 +1578,10 @@ ; RV64-NEXT: vmul.vv v8, v8, v9 ; RV64-NEXT: vrgather.vi v9, v8, 1 ; RV64-NEXT: vmul.vv v8, v8, v9 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: mv a1, a2 -; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: vmv.x.s a1, v8 +; RV64-NEXT: mul a0, a1, a0 ; RV64-NEXT: slli a0, a0, 56 ; RV64-NEXT: srai a0, a0, 56 -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %r = call i8 @llvm.vp.reduce.mul.v8i8(i8 %s, <8 x i8> %v, <8 x i1> %m, i32 %evl) ret i8 %r @@ -1647,11 +1592,6 @@ define signext i8 @vpreduce_mul_v16i8(i8 signext %s, <16 x i8> %v, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpreduce_mul_v16i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: mv a2, a0 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vid.v v12 ; RV32-NEXT: vmsltu.vx v9, v12, a1 @@ -1667,22 +1607,14 @@ ; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: vrgather.vi v9, v8, 1 ; RV32-NEXT: vmul.vv v8, v8, v9 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: mul a0, a1, a0 ; RV32-NEXT: slli a0, a0, 24 ; RV32-NEXT: srai a0, a0, 24 -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_mul_v16i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: mv a2, a0 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vid.v v12 ; RV64-NEXT: vmsltu.vx v9, v12, a1 @@ -1698,13 +1630,10 @@ ; RV64-NEXT: vmul.vv v8, v8, v9 ; RV64-NEXT: vrgather.vi v9, v8, 1 ; RV64-NEXT: vmul.vv v8, v8, v9 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: mv a1, a2 -; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: vmv.x.s a1, v8 +; RV64-NEXT: mul a0, a1, a0 ; RV64-NEXT: slli a0, a0, 56 ; RV64-NEXT: srai a0, a0, 56 -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %r = call i8 @llvm.vp.reduce.mul.v16i8(i8 %s, <16 x i8> %v, <16 x i1> %m, i32 %evl) ret i8 %r @@ -1715,13 +1644,8 @@ define signext i8 @vpreduce_mul_v32i8(i8 signext %s, <32 x i8> %v, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpreduce_mul_v32i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: mv a2, a0 -; RV32-NEXT: li a0, 32 -; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV32-NEXT: li a2, 32 +; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vid.v v16 ; RV32-NEXT: vmsltu.vx v10, v16, a1 ; RV32-NEXT: vmand.mm v0, v10, v0 @@ -1738,24 +1662,16 @@ ; RV32-NEXT: vmul.vv v8, v8, v10 ; RV32-NEXT: vrgather.vi v10, v8, 1 ; RV32-NEXT: vmul.vv v8, v8, v10 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: mul a0, a1, a0 ; RV32-NEXT: slli a0, a0, 24 ; RV32-NEXT: srai a0, a0, 24 -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_mul_v32i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: mv a2, a0 -; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; RV64-NEXT: li a2, 32 +; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV64-NEXT: vid.v v16 ; RV64-NEXT: vmsltu.vx v10, v16, a1 ; RV64-NEXT: vmand.mm v0, v10, v0 @@ -1772,13 +1688,10 @@ ; RV64-NEXT: vmul.vv v8, v8, v10 ; RV64-NEXT: vrgather.vi v10, v8, 1 ; RV64-NEXT: vmul.vv v8, v8, v10 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: mv a1, a2 -; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: vmv.x.s a1, v8 +; RV64-NEXT: mul a0, a1, a0 ; RV64-NEXT: slli a0, a0, 56 ; RV64-NEXT: srai a0, a0, 56 -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %r = call i8 @llvm.vp.reduce.mul.v32i8(i8 %s, <32 x i8> %v, <32 x i1> %m, i32 %evl) ret i8 %r @@ -1789,23 +1702,18 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpreduce_mul_v64i8: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: lui a2, %hi(.LCPI72_0) ; RV32-NEXT: addi a2, a2, %lo(.LCPI72_0) ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vle32.v v16, (a2) -; RV32-NEXT: mv a2, a0 ; RV32-NEXT: vmsltu.vx v12, v16, a1 ; RV32-NEXT: vid.v v16 ; RV32-NEXT: vmsltu.vx v13, v16, a1 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vslideup.vi v13, v12, 4 -; RV32-NEXT: li a0, 64 -; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; RV32-NEXT: li a1, 64 +; RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; RV32-NEXT: vmand.mm v0, v13, v0 ; RV32-NEXT: vmv.v.i v12, 1 ; RV32-NEXT: vmerge.vvm v8, v12, v8, v0 @@ -1821,34 +1729,26 @@ ; RV32-NEXT: vmul.vv v8, v8, v12 ; RV32-NEXT: vrgather.vi v12, v8, 1 ; RV32-NEXT: vmul.vv v8, v8, v12 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: mv a1, a2 -; RV32-NEXT: call __mulsi3@plt +; RV32-NEXT: vmv.x.s a1, v8 +; RV32-NEXT: mul a0, a1, a0 ; RV32-NEXT: slli a0, a0, 24 ; RV32-NEXT: srai a0, a0, 24 -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_mul_v64i8: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: lui a2, %hi(.LCPI72_0) ; RV64-NEXT: addi a2, a2, %lo(.LCPI72_0) ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle32.v v16, (a2) -; RV64-NEXT: mv a2, a0 ; RV64-NEXT: vmsltu.vx v12, v16, a1 ; RV64-NEXT: vid.v v16 ; RV64-NEXT: vmsltu.vx v13, v16, a1 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vslideup.vi v13, v12, 4 -; RV64-NEXT: li a0, 64 -; RV64-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; RV64-NEXT: li a1, 64 +; RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; RV64-NEXT: vmand.mm v0, v13, v0 ; RV64-NEXT: vmv.v.i v12, 1 ; RV64-NEXT: vmerge.vvm v8, v12, v8, v0 @@ -1864,13 +1764,10 @@ ; RV64-NEXT: vmul.vv v8, v8, v12 ; RV64-NEXT: vrgather.vi v12, v8, 1 ; RV64-NEXT: vmul.vv v8, v8, v12 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: mv a1, a2 -; RV64-NEXT: call __muldi3@plt +; RV64-NEXT: vmv.x.s a1, v8 +; RV64-NEXT: mul a0, a1, a0 ; RV64-NEXT: slli a0, a0, 56 ; RV64-NEXT: srai a0, a0, 56 -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %r = call i8 @llvm.vp.reduce.mul.v64i8(i8 %s, <64 x i8> %v, <64 x i1> %m, i32 %evl) ret i8 %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.vp.rint.v2f16(<2 x half>, <2 x i1>, i32) @@ -459,6 +459,52 @@ define <15 x double> @vp_rint_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) @@ -472,6 +518,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <15 x double> @llvm.vp.rint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v @@ -501,6 +591,52 @@ define <16 x double> @vp_rint_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) @@ -514,6 +650,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.rint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v @@ -543,6 +723,71 @@ define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_v32f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 14 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 13 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 12 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 11 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 10 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: li a2, 16 @@ -552,12 +797,6 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI26_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI26_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -590,7 +829,66 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -607,6 +905,52 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -627,6 +971,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.vp.round.v2f16(<2 x half>, <2 x i1>, i32) @@ -503,6 +503,52 @@ define <15 x double> @vp_round_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) @@ -518,6 +564,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <15 x double> @llvm.vp.round.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v @@ -549,6 +639,52 @@ define <16 x double> @vp_round_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) @@ -564,6 +700,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.round.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v @@ -598,9 +778,63 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a2, 26 +; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 23 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 22 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 21 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 20 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 19 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -657,7 +891,61 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -674,6 +962,52 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -698,6 +1032,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.vp.roundeven.v2f16(<2 x half>, <2 x i1>, i32) @@ -503,6 +503,52 @@ define <15 x double> @vp_roundeven_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) @@ -518,6 +564,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <15 x double> @llvm.vp.roundeven.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v @@ -549,6 +639,52 @@ define <16 x double> @vp_roundeven_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) @@ -564,6 +700,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.roundeven.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v @@ -598,9 +778,63 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a2, 26 +; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 23 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 22 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 21 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 20 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 19 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -657,7 +891,61 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -674,6 +962,52 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -698,6 +1032,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half>, <2 x i1>, i32) @@ -503,6 +503,52 @@ define <15 x double> @vp_roundtozero_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a1) @@ -518,6 +564,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <15 x double> @llvm.vp.roundtozero.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v @@ -549,6 +639,52 @@ define <16 x double> @vp_roundtozero_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a1) @@ -564,6 +700,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.roundtozero.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v @@ -598,9 +778,63 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a2, 26 +; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 23 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 22 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 21 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 20 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 19 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -657,7 +891,61 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -674,6 +962,52 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB27_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: lui a2, %hi(.LCPI27_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI27_0)(a2) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma @@ -698,6 +1032,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-load.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-segN-load.ll @@ -1,12 +1,26 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple riscv64 -mattr=+zve64x -riscv-v-vector-bits-min=128 < %s \ +; RUN: llc -mtriple riscv64 -mattr=+m,+zve64x -riscv-v-vector-bits-min=128 < %s \ ; RUN: | FileCheck %s define <8 x i8> @load_factor2(ptr %ptr) { ; CHECK-LABEL: load_factor2: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call { <8 x i8>, <8 x i8> } @llvm.riscv.seg2.load.v8i8.p0.i64(ptr %ptr, i64 8) %2 = extractvalue { <8 x i8>, <8 x i8> } %1, 0 @@ -17,8 +31,30 @@ define <8 x i8> @load_factor3(ptr %ptr) { ; CHECK-LABEL: load_factor3: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vlseg3e8.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg3.load.v8i8.p0.i64(ptr %ptr, i64 8) %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %1, 0 @@ -30,8 +66,40 @@ define <8 x i8> @load_factor4(ptr %ptr) { ; CHECK-LABEL: load_factor4: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vlseg4e8.v v5, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg4.load.v8i8.p0.i64(ptr %ptr, i64 8) %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0 @@ -44,8 +112,52 @@ define <8 x i8> @load_factor5(ptr %ptr) { ; CHECK-LABEL: load_factor5: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vlseg5e8.v v4, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg5.load.v8i8.p0.i64(ptr %ptr, i64 8) %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0 @@ -59,8 +171,64 @@ define <8 x i8> @load_factor6(ptr %ptr) { ; CHECK-LABEL: load_factor6: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x06, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 6 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vlseg6e8.v v3, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg6.load.v8i8.p0.i64(ptr %ptr, i64 8) %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0 @@ -75,8 +243,76 @@ define <8 x i8> @load_factor7(ptr %ptr) { ; CHECK-LABEL: load_factor7: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x06, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 6 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vlseg7e8.v v2, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg7.load.v8i8.p0.i64(ptr %ptr, i64 8) %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0 @@ -92,8 +328,86 @@ define <8 x i8> @load_factor8(ptr %ptr) { ; CHECK-LABEL: load_factor8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vlseg8e8.v v1, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } @llvm.riscv.seg8.load.v8i8.p0.i64(ptr %ptr, i64 8) %2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } %1, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-fp.ll @@ -316,8 +316,8 @@ ; CHECK-LABEL: select_v16f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vmsne.vi v0, v24, 0 +; CHECK-NEXT: vmv.v.x v0, a0 +; CHECK-NEXT: vmsne.vi v0, v0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret @@ -330,8 +330,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: feq.d a0, fa0, fa1 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vmsne.vi v0, v24, 0 +; CHECK-NEXT: vmv.v.x v0, a0 +; CHECK-NEXT: vmsne.vi v0, v0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-int.ll @@ -629,8 +629,8 @@ ; CHECK-LABEL: select_v16i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vmsne.vi v0, v24, 0 +; CHECK-NEXT: vmv.v.x v0, a0 +; CHECK-NEXT: vmsne.vi v0, v0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret @@ -646,8 +646,8 @@ ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: snez a0, a0 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV32-NEXT: vmv.v.x v24, a0 -; RV32-NEXT: vmsne.vi v0, v24, 0 +; RV32-NEXT: vmv.v.x v0, a0 +; RV32-NEXT: vmsne.vi v0, v0, 0 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vmerge.vvm v8, v16, v8, v0 ; RV32-NEXT: ret @@ -657,8 +657,8 @@ ; RV64-NEXT: xor a0, a0, a1 ; RV64-NEXT: snez a0, a0 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; RV64-NEXT: vmv.v.x v24, a0 -; RV64-NEXT: vmsne.vi v0, v24, 0 +; RV64-NEXT: vmv.v.x v0, a0 +; RV64-NEXT: vmsne.vi v0, v0, 0 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vmerge.vvm v8, v16, v8, v0 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll @@ -546,9 +546,63 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 26 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 23 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 4 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -588,7 +642,61 @@ ; CHECK-NEXT: vslideup.vi v16, v1, 8 ; CHECK-NEXT: vmv.v.v v0, v16 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1157,9 +1265,63 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 26 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 23 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 4 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -1199,7 +1361,61 @@ ; CHECK-NEXT: vslideup.vi v16, v1, 2 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll @@ -631,9 +631,69 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a4, 26 +; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 25 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 24 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 23 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 22 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 21 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 20 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 19 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 18 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a4, a1, 4 +; CHECK-NEXT: add a1, a4, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -671,7 +731,67 @@ ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmv1r.v v8, v1 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 25 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -682,6 +802,18 @@ define <256 x i1> @icmp_eq_vx_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_v256i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma @@ -701,6 +833,16 @@ ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer @@ -711,6 +853,18 @@ define <256 x i1> @icmp_eq_vx_swap_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_swap_v256i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma @@ -730,6 +884,16 @@ ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer @@ -1320,9 +1484,63 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 26 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 23 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 4 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -1362,7 +1580,61 @@ ; CHECK-NEXT: vslideup.vi v16, v1, 4 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1373,6 +1645,18 @@ define <64 x i1> @icmp_eq_vx_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_v64i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 4 @@ -1393,6 +1677,16 @@ ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v16, v25, 4 ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement <64 x i32> poison, i32 %b, i32 0 %vb = shufflevector <64 x i32> %elt.head, <64 x i32> poison, <64 x i32> zeroinitializer @@ -1403,6 +1697,18 @@ define <64 x i1> @icmp_eq_vx_swap_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_swap_v64i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 4 @@ -1423,6 +1729,16 @@ ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v16, v25, 4 ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement <64 x i32> poison, i32 %b, i32 0 %vb = shufflevector <64 x i32> %elt.head, <64 x i32> poison, <64 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll @@ -153,6 +153,58 @@ define <32 x i64> @vsext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vsext_v32i64_v32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 10 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 @@ -173,6 +225,56 @@ ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vsext.vf2 v24, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.sext.v32i64.v32i32(<32 x i32> %va, <32 x i1> %m, i32 %evl) ret <32 x i64> %v @@ -181,6 +283,52 @@ define <32 x i64> @vsext_v32i64_v32i32_unmasked(<32 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vsext_v32i64_v32i32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a2, a0, a1 ; CHECK-NEXT: addi a2, a2, -1 @@ -197,6 +345,50 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsext.vf2 v24, v8 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.sext.v32i64.v32i32(<32 x i32> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll @@ -870,6 +870,52 @@ define <12 x i64> @reverse_v12i64(<12 x i64> %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_v12i64: ; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, -16 +; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16 +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 3 +; RV32-BITS-UNKNOWN-NEXT: sub sp, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 3 +; RV32-BITS-UNKNOWN-NEXT: sub a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 6 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 2 +; RV32-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 2 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: addi a0, sp, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32-BITS-UNKNOWN-NEXT: lui a0, %hi(.LCPI45_0) ; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, %lo(.LCPI45_0) ; RV32-BITS-UNKNOWN-NEXT: li a1, 32 @@ -877,10 +923,100 @@ ; RV32-BITS-UNKNOWN-NEXT: vle32.v v24, (a0) ; RV32-BITS-UNKNOWN-NEXT: vrgather.vv v16, v8, v24 ; RV32-BITS-UNKNOWN-NEXT: vmv.v.v v8, v16 +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 3 +; RV32-BITS-UNKNOWN-NEXT: sub a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 6 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 2 +; RV32-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 2 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: addi a0, sp, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 3 +; RV32-BITS-UNKNOWN-NEXT: add sp, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, 16 ; RV32-BITS-UNKNOWN-NEXT: ret ; ; RV32-BITS-256-LABEL: reverse_v12i64: ; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: addi sp, sp, -16 +; RV32-BITS-256-NEXT: .cfi_def_cfa_offset 16 +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a0, a0, 3 +; RV32-BITS-256-NEXT: sub sp, sp, a0 +; RV32-BITS-256-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 3 +; RV32-BITS-256-NEXT: sub a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 6 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 2 +; RV32-BITS-256-NEXT: add a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a0, a0, 2 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 1 +; RV32-BITS-256-NEXT: add a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a0, a0, 1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: addi a0, sp, 16 +; RV32-BITS-256-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32-BITS-256-NEXT: lui a0, %hi(.LCPI45_0) ; RV32-BITS-256-NEXT: addi a0, a0, %lo(.LCPI45_0) ; RV32-BITS-256-NEXT: li a1, 32 @@ -888,10 +1024,100 @@ ; RV32-BITS-256-NEXT: vle32.v v24, (a0) ; RV32-BITS-256-NEXT: vrgather.vv v16, v8, v24 ; RV32-BITS-256-NEXT: vmv.v.v v8, v16 +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 3 +; RV32-BITS-256-NEXT: sub a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 6 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 2 +; RV32-BITS-256-NEXT: add a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a0, a0, 2 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 1 +; RV32-BITS-256-NEXT: add a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a0, a0, 1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: addi a0, sp, 16 +; RV32-BITS-256-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a0, a0, 3 +; RV32-BITS-256-NEXT: add sp, sp, a0 +; RV32-BITS-256-NEXT: addi sp, sp, 16 ; RV32-BITS-256-NEXT: ret ; ; RV32-BITS-512-LABEL: reverse_v12i64: ; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: addi sp, sp, -16 +; RV32-BITS-512-NEXT: .cfi_def_cfa_offset 16 +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a0, a0, 3 +; RV32-BITS-512-NEXT: sub sp, sp, a0 +; RV32-BITS-512-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a1, a0, 3 +; RV32-BITS-512-NEXT: sub a0, a1, a0 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 6 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a1, a0, 2 +; RV32-BITS-512-NEXT: add a0, a1, a0 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a0, a0, 2 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a1, a0, 1 +; RV32-BITS-512-NEXT: add a0, a1, a0 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a0, a0, 1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: addi a0, sp, 16 +; RV32-BITS-512-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32-BITS-512-NEXT: lui a0, %hi(.LCPI45_0) ; RV32-BITS-512-NEXT: addi a0, a0, %lo(.LCPI45_0) ; RV32-BITS-512-NEXT: li a1, 32 @@ -899,33 +1125,347 @@ ; RV32-BITS-512-NEXT: vle32.v v24, (a0) ; RV32-BITS-512-NEXT: vrgather.vv v16, v8, v24 ; RV32-BITS-512-NEXT: vmv.v.v v8, v16 +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a1, a0, 3 +; RV32-BITS-512-NEXT: sub a0, a1, a0 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 6 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a1, a0, 2 +; RV32-BITS-512-NEXT: add a0, a1, a0 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a0, a0, 2 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a1, a0, 1 +; RV32-BITS-512-NEXT: add a0, a1, a0 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a0, a0, 1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: addi a0, sp, 16 +; RV32-BITS-512-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a0, a0, 3 +; RV32-BITS-512-NEXT: add sp, sp, a0 +; RV32-BITS-512-NEXT: addi sp, sp, 16 ; RV32-BITS-512-NEXT: ret ; ; RV64-BITS-UNKNOWN-LABEL: reverse_v12i64: ; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, -16 +; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16 +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 3 +; RV64-BITS-UNKNOWN-NEXT: sub sp, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 3 +; RV64-BITS-UNKNOWN-NEXT: sub a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 6 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 2 +; RV64-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 1 +; RV64-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: addi a0, sp, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV64-BITS-UNKNOWN-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v16 ; RV64-BITS-UNKNOWN-NEXT: vrsub.vi v24, v16, 11 ; RV64-BITS-UNKNOWN-NEXT: vrgather.vv v16, v8, v24 ; RV64-BITS-UNKNOWN-NEXT: vmv.v.v v8, v16 +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 3 +; RV64-BITS-UNKNOWN-NEXT: sub a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 6 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 2 +; RV64-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 1 +; RV64-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: addi a0, sp, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 3 +; RV64-BITS-UNKNOWN-NEXT: add sp, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, 16 ; RV64-BITS-UNKNOWN-NEXT: ret ; ; RV64-BITS-256-LABEL: reverse_v12i64: ; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: addi sp, sp, -16 +; RV64-BITS-256-NEXT: .cfi_def_cfa_offset 16 +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 3 +; RV64-BITS-256-NEXT: sub sp, sp, a0 +; RV64-BITS-256-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 3 +; RV64-BITS-256-NEXT: sub a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 6 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 2 +; RV64-BITS-256-NEXT: add a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 2 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 1 +; RV64-BITS-256-NEXT: add a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: addi a0, sp, 16 +; RV64-BITS-256-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV64-BITS-256-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-BITS-256-NEXT: vid.v v16 ; RV64-BITS-256-NEXT: vrsub.vi v24, v16, 11 ; RV64-BITS-256-NEXT: vrgather.vv v16, v8, v24 ; RV64-BITS-256-NEXT: vmv.v.v v8, v16 +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 3 +; RV64-BITS-256-NEXT: sub a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 6 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 2 +; RV64-BITS-256-NEXT: add a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 2 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 1 +; RV64-BITS-256-NEXT: add a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: addi a0, sp, 16 +; RV64-BITS-256-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 3 +; RV64-BITS-256-NEXT: add sp, sp, a0 +; RV64-BITS-256-NEXT: addi sp, sp, 16 ; RV64-BITS-256-NEXT: ret ; ; RV64-BITS-512-LABEL: reverse_v12i64: ; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: addi sp, sp, -16 +; RV64-BITS-512-NEXT: .cfi_def_cfa_offset 16 +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a0, a0, 3 +; RV64-BITS-512-NEXT: sub sp, sp, a0 +; RV64-BITS-512-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a1, a0, 3 +; RV64-BITS-512-NEXT: sub a0, a1, a0 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 6 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a1, a0, 2 +; RV64-BITS-512-NEXT: add a0, a1, a0 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a0, a0, 2 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a1, a0, 1 +; RV64-BITS-512-NEXT: add a0, a1, a0 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a0, a0, 1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: addi a0, sp, 16 +; RV64-BITS-512-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV64-BITS-512-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-BITS-512-NEXT: vid.v v16 ; RV64-BITS-512-NEXT: vrsub.vi v24, v16, 11 ; RV64-BITS-512-NEXT: vrgather.vv v16, v8, v24 ; RV64-BITS-512-NEXT: vmv.v.v v8, v16 +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a1, a0, 3 +; RV64-BITS-512-NEXT: sub a0, a1, a0 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 6 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a1, a0, 2 +; RV64-BITS-512-NEXT: add a0, a1, a0 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a0, a0, 2 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a1, a0, 1 +; RV64-BITS-512-NEXT: add a0, a1, a0 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a0, a0, 1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: addi a0, sp, 16 +; RV64-BITS-512-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a0, a0, 3 +; RV64-BITS-512-NEXT: add sp, sp, a0 +; RV64-BITS-512-NEXT: addi sp, sp, 16 ; RV64-BITS-512-NEXT: ret %res = call <12 x i64> @llvm.experimental.vector.reverse.v12i64(<12 x i64> %a) ret <12 x i64> %res diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll @@ -308,6 +308,14 @@ define <32 x double> @vsitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vsitofp_v32f64_v32i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 @@ -325,6 +333,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -500,6 +500,52 @@ ; CHECK-RV32-NEXT: # %bb.5: ; CHECK-RV32-NEXT: li a3, 16 ; CHECK-RV32-NEXT: .LBB35_6: +; CHECK-RV32-NEXT: addi sp, sp, -16 +; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV32-NEXT: csrr a4, vlenb +; CHECK-RV32-NEXT: slli a4, a4, 3 +; CHECK-RV32-NEXT: sub sp, sp, a4 +; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-RV32-NEXT: csrr a4, vlenb +; CHECK-RV32-NEXT: slli a6, a4, 3 +; CHECK-RV32-NEXT: sub a4, a6, a4 +; CHECK-RV32-NEXT: add a4, sp, a4 +; CHECK-RV32-NEXT: addi a4, a4, 16 +; CHECK-RV32-NEXT: vs1r.v v24, (a4) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a4, vlenb +; CHECK-RV32-NEXT: li a6, 6 +; CHECK-RV32-NEXT: mul a4, a4, a6 +; CHECK-RV32-NEXT: add a4, sp, a4 +; CHECK-RV32-NEXT: addi a4, a4, 16 +; CHECK-RV32-NEXT: vs1r.v v25, (a4) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a4, vlenb +; CHECK-RV32-NEXT: slli a6, a4, 2 +; CHECK-RV32-NEXT: add a4, a6, a4 +; CHECK-RV32-NEXT: add a4, sp, a4 +; CHECK-RV32-NEXT: addi a4, a4, 16 +; CHECK-RV32-NEXT: vs1r.v v26, (a4) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a4, vlenb +; CHECK-RV32-NEXT: slli a4, a4, 2 +; CHECK-RV32-NEXT: add a4, sp, a4 +; CHECK-RV32-NEXT: addi a4, a4, 16 +; CHECK-RV32-NEXT: vs1r.v v27, (a4) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a4, vlenb +; CHECK-RV32-NEXT: slli a6, a4, 1 +; CHECK-RV32-NEXT: add a4, a6, a4 +; CHECK-RV32-NEXT: add a4, sp, a4 +; CHECK-RV32-NEXT: addi a4, a4, 16 +; CHECK-RV32-NEXT: vs1r.v v28, (a4) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a4, vlenb +; CHECK-RV32-NEXT: slli a4, a4, 1 +; CHECK-RV32-NEXT: add a4, sp, a4 +; CHECK-RV32-NEXT: addi a4, a4, 16 +; CHECK-RV32-NEXT: vs1r.v v29, (a4) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a4, vlenb +; CHECK-RV32-NEXT: add a4, sp, a4 +; CHECK-RV32-NEXT: addi a4, a4, 16 +; CHECK-RV32-NEXT: vs1r.v v30, (a4) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: addi a4, sp, 16 +; CHECK-RV32-NEXT: vs1r.v v31, (a4) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: mul a4, a3, a2 ; CHECK-RV32-NEXT: add a4, a1, a4 ; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -517,6 +563,50 @@ ; CHECK-RV32-NEXT: addi a0, a0, 128 ; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-RV32-NEXT: vse64.v v24, (a0) +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 3 +; CHECK-RV32-NEXT: sub a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: li a1, 6 +; CHECK-RV32-NEXT: mul a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 2 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 1 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: addi a0, sp, 16 +; CHECK-RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 3 +; CHECK-RV32-NEXT: add sp, sp, a0 +; CHECK-RV32-NEXT: addi sp, sp, 16 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: strided_load_v33f64: @@ -551,6 +641,52 @@ ; CHECK-RV64-NEXT: # %bb.5: ; CHECK-RV64-NEXT: li a4, 16 ; CHECK-RV64-NEXT: .LBB35_6: +; CHECK-RV64-NEXT: addi sp, sp, -16 +; CHECK-RV64-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV64-NEXT: csrr a3, vlenb +; CHECK-RV64-NEXT: slli a3, a3, 3 +; CHECK-RV64-NEXT: sub sp, sp, a3 +; CHECK-RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-RV64-NEXT: csrr a3, vlenb +; CHECK-RV64-NEXT: slli a6, a3, 3 +; CHECK-RV64-NEXT: sub a3, a6, a3 +; CHECK-RV64-NEXT: add a3, sp, a3 +; CHECK-RV64-NEXT: addi a3, a3, 16 +; CHECK-RV64-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a3, vlenb +; CHECK-RV64-NEXT: li a6, 6 +; CHECK-RV64-NEXT: mul a3, a3, a6 +; CHECK-RV64-NEXT: add a3, sp, a3 +; CHECK-RV64-NEXT: addi a3, a3, 16 +; CHECK-RV64-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a3, vlenb +; CHECK-RV64-NEXT: slli a6, a3, 2 +; CHECK-RV64-NEXT: add a3, a6, a3 +; CHECK-RV64-NEXT: add a3, sp, a3 +; CHECK-RV64-NEXT: addi a3, a3, 16 +; CHECK-RV64-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a3, vlenb +; CHECK-RV64-NEXT: slli a3, a3, 2 +; CHECK-RV64-NEXT: add a3, sp, a3 +; CHECK-RV64-NEXT: addi a3, a3, 16 +; CHECK-RV64-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a3, vlenb +; CHECK-RV64-NEXT: slli a6, a3, 1 +; CHECK-RV64-NEXT: add a3, a6, a3 +; CHECK-RV64-NEXT: add a3, sp, a3 +; CHECK-RV64-NEXT: addi a3, a3, 16 +; CHECK-RV64-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a3, vlenb +; CHECK-RV64-NEXT: slli a3, a3, 1 +; CHECK-RV64-NEXT: add a3, sp, a3 +; CHECK-RV64-NEXT: addi a3, a3, 16 +; CHECK-RV64-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a3, vlenb +; CHECK-RV64-NEXT: add a3, sp, a3 +; CHECK-RV64-NEXT: addi a3, a3, 16 +; CHECK-RV64-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: addi a3, sp, 16 +; CHECK-RV64-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: mul a3, a4, a2 ; CHECK-RV64-NEXT: add a3, a1, a3 ; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma @@ -568,6 +704,50 @@ ; CHECK-RV64-NEXT: addi a0, a0, 128 ; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-RV64-NEXT: vse64.v v24, (a0) +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a1, a0, 3 +; CHECK-RV64-NEXT: sub a0, a1, a0 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: li a1, 6 +; CHECK-RV64-NEXT: mul a0, a0, a1 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a1, a0, 2 +; CHECK-RV64-NEXT: add a0, a1, a0 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a0, a0, 2 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a1, a0, 1 +; CHECK-RV64-NEXT: add a0, a1, a0 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a0, a0, 1 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: addi a0, sp, 16 +; CHECK-RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a0, a0, 3 +; CHECK-RV64-NEXT: add sp, sp, a0 +; CHECK-RV64-NEXT: addi sp, sp, 16 ; CHECK-RV64-NEXT: ret %v = call <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr %ptr, i64 %stride, <33 x i1> %mask, i32 %evl) ret <33 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll @@ -53,6 +53,52 @@ define <128 x i7> @vtrunc_v128i7_v128i16(<128 x i16> %a, <128 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vtrunc_v128i7_v128i16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v28, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 8 @@ -74,6 +120,50 @@ ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vslideup.vx v16, v24, a1 ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <128 x i7> @llvm.vp.trunc.v128i7.v128i16(<128 x i16> %a, <128 x i1> %m, i32 %vl) ret <128 x i7> %v @@ -227,10 +317,81 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 56 +; CHECK-NEXT: li a3, 68 ; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc4, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 68 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 67 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 66 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 6 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 6 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 6 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 62 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 61 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 60 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 59 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 58 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 57 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 56 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 4 @@ -477,8 +638,79 @@ ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 67 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 66 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 6 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 6 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 6 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 62 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 61 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 60 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 59 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 58 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 57 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 56 ; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 68 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -491,6 +723,52 @@ define <32 x i32> @vtrunc_v32i32_v32i64(<32 x i64> %a, <32 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vtrunc_v32i32_v32i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v28, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 @@ -512,6 +790,50 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vslideup.vi v16, v24, 16 ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x i32> @llvm.vp.trunc.v32i32.v32i64(<32 x i64> %a, <32 x i1> %m, i32 %vl) ret <32 x i32> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll @@ -308,6 +308,14 @@ define <32 x double> @vuitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vuitofp_v32f64_v32i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 @@ -325,6 +333,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.uitofp.v32f64.v32i64(<32 x i64> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <8 x i7> @llvm.vp.add.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) @@ -413,6 +413,14 @@ define <256 x i8> @vadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vadd_vi_v258i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma @@ -430,6 +438,12 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer @@ -468,6 +482,14 @@ define <256 x i8> @vadd_vi_v258i8_evl129(<256 x i8> %va, <256 x i1> %m) { ; CHECK-LABEL: vadd_vi_v258i8_evl129: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a0) @@ -475,6 +497,12 @@ ; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 -1, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer @@ -1528,6 +1556,58 @@ define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vadd_vx_v32i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 10 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 @@ -1548,10 +1628,68 @@ ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: vadd.vv v8, v8, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vadd_vx_v32i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 @@ -1569,6 +1707,12 @@ ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vadd.vi v8, v8, -1, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer @@ -1579,6 +1723,52 @@ define <32 x i64> @vadd_vi_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vadd_vi_v32i64_unmasked: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; RV32-NEXT: li a2, 16 @@ -1596,6 +1786,50 @@ ; RV32-NEXT: and a0, a0, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v16, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vadd_vi_v32i64_unmasked: @@ -1649,6 +1883,58 @@ define <32 x i64> @vadd_vx_v32i64_evl27(<32 x i64> %va, <32 x i1> %m) { ; RV32-LABEL: vadd_vx_v32i64_evl27: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 @@ -1660,10 +1946,68 @@ ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: vadd.vv v8, v8, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vadd_vx_v32i64_evl27: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma @@ -1671,6 +2015,11 @@ ; RV64-NEXT: vsetivli zero, 11, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vadd.vi v16, v16, -1, v0.t +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vand-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <8 x i7> @llvm.vp.and.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) @@ -1293,6 +1293,52 @@ define <11 x i64> @vand_vx_v11i64(<11 x i64> %va, i64 %b, <11 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vand_vx_v11i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v16, v0 ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma @@ -1306,6 +1352,50 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vand_vx_v11i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll @@ -324,9 +324,57 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 23 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 4 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -363,7 +411,55 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfsgnj.vv v8, v8, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -374,6 +470,93 @@ define <32 x double> @vfsgnj_vv_v32f64_unmasked(<32 x double> %va, <32 x double> %vb, i32 zeroext %evl) { ; CHECK-LABEL: vfsgnj_vv_v32f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 3 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 3 +; CHECK-NEXT: sub a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 2 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 1 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) @@ -392,6 +575,91 @@ ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfsgnj.vv v16, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll @@ -321,6 +321,14 @@ define <32 x double> @vfabs_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfabs_vv_v32f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 @@ -338,6 +346,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfabs.v v8, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.fabs.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll @@ -570,11 +570,101 @@ define <15 x double> @vfma_vv_v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <15 x double> @llvm.vp.fma.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 %evl) ret <15 x double> %v @@ -583,10 +673,100 @@ define <15 x double> @vfma_vv_v15f64_unmasked(<15 x double> %va, <15 x double> %b, <15 x double> %c, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_v15f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer @@ -599,11 +779,101 @@ define <16 x double> @vfma_vv_v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.fma.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 %evl) ret <16 x double> %v @@ -612,10 +882,100 @@ define <16 x double> @vfma_vv_v16f64_unmasked(<16 x double> %va, <16 x double> %b, <16 x double> %c, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_v16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -657,10 +1017,64 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 40 +; CHECK-NEXT: li a3, 50 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x32, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 50 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 48 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 47 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 46 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 45 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 44 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 43 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 42 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 41 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 40 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 @@ -742,8 +1156,62 @@ ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 48 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 47 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 46 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 45 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 44 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 43 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 42 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 41 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 40 ; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 50 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -757,10 +1225,99 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: li a3, 40 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 38 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 37 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 36 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 35 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 34 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 5 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 5 +; CHECK-NEXT: sub a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 30 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 29 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 28 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 27 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 26 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 25 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a2, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) @@ -809,8 +1366,97 @@ ; CHECK-NEXT: vmv8r.v v8, v0 ; CHECK-NEXT: vmv.v.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 38 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 37 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 36 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 35 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 34 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 5 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 5 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 30 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 29 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 28 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 27 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 25 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 40 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll @@ -324,9 +324,57 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 23 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 4 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -363,7 +411,55 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -374,6 +470,93 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double> %vb, i32 zeroext %evl) { ; CHECK-LABEL: vfmax_vv_v32f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 3 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 3 +; CHECK-NEXT: sub a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 2 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 1 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) @@ -392,6 +575,91 @@ ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmax.vv v16, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll @@ -324,9 +324,57 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 23 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 4 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -363,7 +411,55 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -374,6 +470,93 @@ define <32 x double> @vfmin_vv_v32f64_unmasked(<32 x double> %va, <32 x double> %vb, i32 zeroext %evl) { ; CHECK-LABEL: vfmin_vv_v32f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 3 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 3 +; CHECK-NEXT: sub a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 2 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 1 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) @@ -392,6 +575,91 @@ ; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfmin.vv v16, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <32 x i1> poison, i1 true, i32 0 %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll @@ -570,11 +570,101 @@ define <15 x double> @vfma_vv_v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_v15f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <15 x double> @llvm.vp.fmuladd.v15f64(<15 x double> %va, <15 x double> %b, <15 x double> %c, <15 x i1> %m, i32 %evl) ret <15 x double> %v @@ -583,10 +673,100 @@ define <15 x double> @vfma_vv_v15f64_unmasked(<15 x double> %va, <15 x double> %b, <15 x double> %c, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_v15f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <15 x i1> poison, i1 true, i32 0 %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer @@ -599,11 +779,101 @@ define <16 x double> @vfma_vv_v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_v16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.fmuladd.v16f64(<16 x double> %va, <16 x double> %b, <16 x double> %c, <16 x i1> %m, i32 %evl) ret <16 x double> %v @@ -612,10 +882,100 @@ define <16 x double> @vfma_vv_v16f64_unmasked(<16 x double> %va, <16 x double> %b, <16 x double> %c, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_v16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement <16 x i1> poison, i1 true, i32 0 %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer @@ -657,10 +1017,64 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 40 +; CHECK-NEXT: li a3, 50 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x32, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 50 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 48 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 47 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 46 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 45 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 44 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 43 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 42 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 41 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 40 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 @@ -742,8 +1156,62 @@ ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 48 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 47 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 46 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 45 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 44 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 43 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 42 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 41 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 40 ; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 50 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -757,10 +1225,99 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: li a3, 40 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 38 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 37 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 36 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 35 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 34 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 5 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 5 +; CHECK-NEXT: sub a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 30 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 29 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 28 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 27 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 26 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 25 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a2, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a1) @@ -809,8 +1366,97 @@ ; CHECK-NEXT: vmv8r.v v8, v0 ; CHECK-NEXT: vmv.v.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 38 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 37 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 36 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 35 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 34 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 5 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 5 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 30 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 29 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 28 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 27 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 25 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 40 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll @@ -321,6 +321,14 @@ define <32 x double> @vfneg_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfneg_vv_v32f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 @@ -338,6 +346,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfneg.v v8, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.fneg.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll @@ -321,6 +321,14 @@ define <32 x double> @vfsqrt_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfsqrt_vv_v32f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 @@ -338,6 +346,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.sqrt.v32f64(<32 x double> %va, <32 x i1> %m, i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwadd.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+f,+d -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh,+f,+d -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+f,+d -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh,+f,+d -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define <2 x float> @vfwadd_v2f16(ptr %x, ptr %y) { @@ -91,9 +91,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0) @@ -110,7 +199,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwadd.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -192,9 +370,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0) @@ -209,7 +476,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwadd.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -369,6 +725,93 @@ define <32 x double> @vfwadd_vf_v32f32(ptr %x, float %y) { ; CHECK-LABEL: vfwadd_vf_v32f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 14 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 13 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 12 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 11 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 10 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v24, (a0) @@ -379,6 +822,91 @@ ; CHECK-NEXT: vfwcvt.f.f.v v16, v8 ; CHECK-NEXT: vfwadd.wv v8, v16, v24 ; CHECK-NEXT: vfwadd.wv v16, v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = load <32 x float>, ptr %x %b = insertelement <32 x float> poison, float %y, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwmul.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+f,+d -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh,+f,+d -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+f,+d -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh,+f,+d -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define <2 x float> @vfwmul_v2f16(ptr %x, ptr %y) { @@ -91,9 +91,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0) @@ -110,7 +199,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwmul.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -192,9 +370,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0) @@ -209,7 +476,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwmul.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -369,6 +725,93 @@ define <32 x double> @vfwmul_vf_v32f32(ptr %x, float %y) { ; CHECK-LABEL: vfwmul_vf_v32f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 14 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 13 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 12 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 11 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 10 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) @@ -382,6 +825,91 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmul.vv v8, v16, v0 ; CHECK-NEXT: vfmul.vv v16, v24, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = load <32 x float>, ptr %x %b = insertelement <32 x float> poison, float %y, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfwsub.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh,+f,+d -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh,+f,+d -target-abi=ilp32d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh,+f,+d -target-abi=lp64d -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh,+f,+d -target-abi=lp64d -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s define <2 x float> @vfwsub_v2f16(ptr %x, ptr %y) { @@ -91,9 +91,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0) @@ -110,7 +199,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwsub.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -192,9 +370,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0) @@ -209,7 +476,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vfwsub.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -369,6 +725,93 @@ define <32 x double> @vfwsub_vf_v32f32(ptr %x, float %y) { ; CHECK-LABEL: vfwsub_vf_v32f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 14 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 13 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 12 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 11 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 10 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) @@ -382,6 +825,91 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfsub.vv v8, v16, v0 ; CHECK-NEXT: vfsub.vv v16, v24, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = load <32 x float>, ptr %x %b = insertelement <32 x float> poison, float %y, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <8 x i7> @llvm.vp.smax.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) @@ -288,6 +288,14 @@ define <256 x i8> @vmax_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmax_vx_v258i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma @@ -305,6 +313,12 @@ ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer @@ -343,6 +357,14 @@ define <256 x i8> @vmax_vx_v258i8_evl129(<256 x i8> %va, i8 %b, <256 x i1> %m) { ; CHECK-LABEL: vmax_vx_v258i8_evl129: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a1) @@ -350,6 +372,12 @@ ; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer @@ -1091,6 +1119,58 @@ define <32 x i64> @vmax_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vmax_vx_v32i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 10 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 @@ -1111,10 +1191,68 @@ ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: vmax.vv v8, v8, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vmax_vx_v32i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 @@ -1133,6 +1271,12 @@ ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vmax.vx v8, v8, a1, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <8 x i7> @llvm.vp.umax.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) @@ -287,6 +287,14 @@ define <256 x i8> @vmaxu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmaxu_vx_v258i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma @@ -304,6 +312,12 @@ ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer @@ -342,6 +356,14 @@ define <256 x i8> @vmaxu_vx_v258i8_evl129(<256 x i8> %va, i8 %b, <256 x i1> %m) { ; CHECK-LABEL: vmaxu_vx_v258i8_evl129: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a1) @@ -349,6 +371,12 @@ ; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer @@ -1090,6 +1118,58 @@ define <32 x i64> @vmaxu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vmaxu_vx_v32i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 10 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 @@ -1110,10 +1190,68 @@ ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: vmaxu.vv v8, v8, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vmaxu_vx_v32i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 @@ -1132,6 +1270,12 @@ ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vmaxu.vx v8, v8, a1, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <8 x i7> @llvm.vp.smin.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) @@ -288,6 +288,14 @@ define <256 x i8> @vmin_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmin_vx_v258i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma @@ -305,6 +313,12 @@ ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer @@ -343,6 +357,14 @@ define <256 x i8> @vmin_vx_v258i8_evl129(<256 x i8> %va, i8 %b, <256 x i1> %m) { ; CHECK-LABEL: vmin_vx_v258i8_evl129: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a1) @@ -350,6 +372,12 @@ ; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer @@ -1091,6 +1119,58 @@ define <32 x i64> @vmin_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vmin_vx_v32i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 10 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 @@ -1111,10 +1191,68 @@ ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: vmin.vv v8, v8, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vmin_vx_v32i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 @@ -1133,6 +1271,12 @@ ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vmin.vx v8, v8, a1, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 declare <8 x i7> @llvm.vp.umin.v8i7(<8 x i7>, <8 x i7>, <8 x i1>, i32) @@ -287,6 +287,14 @@ define <256 x i8> @vminu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vminu_vx_v258i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma @@ -304,6 +312,12 @@ ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer @@ -342,6 +356,14 @@ define <256 x i8> @vminu_vx_v258i8_evl129(<256 x i8> %va, i8 %b, <256 x i1> %m) { ; CHECK-LABEL: vminu_vx_v258i8_evl129: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v24, (a1) @@ -349,6 +371,12 @@ ; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement <256 x i8> poison, i8 %b, i32 0 %vb = shufflevector <256 x i8> %elt.head, <256 x i8> poison, <256 x i32> zeroinitializer @@ -1090,6 +1118,58 @@ define <32 x i64> @vminu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vminu_vx_v32i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 10 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 @@ -1110,10 +1190,68 @@ ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: vminu.vv v8, v8, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vminu_vx_v32i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 @@ -1132,6 +1270,12 @@ ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vminu.vx v8, v8, a1, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %elt.head = insertelement <32 x i64> poison, i64 -1, i32 0 %vb = shufflevector <32 x i64> %elt.head, <32 x i64> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 declare <2 x i8> @llvm.vp.gather.v2i8.v2p0(<2 x ptr>, <2 x i1>, i32) @@ -1890,6 +1890,58 @@ define <32 x double> @vpgather_v32f64(<32 x ptr> %ptrs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_v32f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 10 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v1, v0 ; RV32-NEXT: addi a1, a0, -16 ; RV32-NEXT: sltu a2, a0, a1 @@ -1910,10 +1962,68 @@ ; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_v32f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: addi a1, a0, -16 ; RV64-NEXT: sltu a2, a0, a1 @@ -1931,6 +2041,12 @@ ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) ret <32 x double> %v @@ -1949,6 +2065,52 @@ ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB87_2: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma @@ -1961,10 +2123,100 @@ ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_v32i8_v32f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v10, v0 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v16, v8 @@ -1990,6 +2242,50 @@ ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, <32 x i8> %idxs %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) @@ -2009,6 +2305,52 @@ ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB88_2: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma @@ -2021,10 +2363,100 @@ ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_v32i8_v32f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v10, v0 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64-NEXT: vslidedown.vi v12, v8, 16 @@ -2049,6 +2481,50 @@ ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext <32 x i8> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs @@ -2069,6 +2545,52 @@ ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB89_2: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma @@ -2081,10 +2603,100 @@ ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v32i8_v32f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v10, v0 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64-NEXT: vslidedown.vi v12, v8, 16 @@ -2109,6 +2721,50 @@ ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v10 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = zext <32 x i8> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs @@ -2129,6 +2785,52 @@ ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB90_2: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma @@ -2141,10 +2843,100 @@ ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_v32i16_v32f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v8 @@ -2170,6 +2962,50 @@ ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, <32 x i16> %idxs %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) @@ -2189,6 +3025,52 @@ ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB91_2: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma @@ -2201,10 +3083,141 @@ ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_v32i16_v32f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 14 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 13 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 12 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 11 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 10 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma ; RV64-NEXT: vslidedown.vi v16, v8, 16 @@ -2229,6 +3242,91 @@ ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext <32 x i16> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs @@ -2249,6 +3347,52 @@ ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB92_2: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma @@ -2261,10 +3405,141 @@ ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v32i16_v32f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 4 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 14 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 13 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 12 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 11 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 10 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma ; RV64-NEXT: vslidedown.vi v16, v8, 16 @@ -2289,6 +3564,91 @@ ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = zext <32 x i16> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs @@ -2308,6 +3668,52 @@ ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB93_2: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma @@ -2320,6 +3726,50 @@ ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_v32i32_v32f64: @@ -2327,9 +3777,15 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: li a3, 10 +; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v16, v8 @@ -2361,6 +3817,12 @@ ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -2381,6 +3843,52 @@ ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB94_2: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma @@ -2393,10 +3901,100 @@ ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_v32i32_v32f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV64-NEXT: vslidedown.vi v16, v8, 16 @@ -2421,6 +4019,50 @@ ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext <32 x i32> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs @@ -2440,6 +4082,52 @@ ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB95_2: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma @@ -2452,10 +4140,100 @@ ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_v32i32_v32f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV64-NEXT: vslidedown.vi v16, v8, 16 @@ -2480,6 +4258,50 @@ ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = zext <32 x i32> %idxs to <32 x i64> %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %eidxs @@ -2490,6 +4312,52 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_v32f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: sub a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 6 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 2 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 1 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vnsrl.wi v24, v16, 0 ; RV32-NEXT: vnsrl.wi v16, v8, 0 @@ -2515,10 +4383,62 @@ ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_v32f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsll.vi v8, v8, 3 @@ -2539,6 +4459,12 @@ ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vluxei64.v v8, (a0), v8, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, <32 x i64> %idxs %v = call <32 x double> @llvm.vp.gather.v32f64.v32p0(<32 x ptr> %ptrs, <32 x i1> %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -riscv-v-vector-bits-min=128 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare <2 x i8> @llvm.vp.load.v2i8.p0(ptr, <2 x i1>, i32) @@ -436,6 +436,52 @@ ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: .LBB32_4: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: slli a5, a5, 3 +; CHECK-NEXT: sub sp, sp, a5 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: slli a6, a5, 3 +; CHECK-NEXT: sub a5, a6, a5 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs1r.v v24, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li a6, 6 +; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs1r.v v25, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: slli a6, a5, 2 +; CHECK-NEXT: add a5, a6, a5 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs1r.v v26, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: slli a5, a5, 2 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs1r.v v27, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: slli a6, a5, 1 +; CHECK-NEXT: add a5, a6, a5 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs1r.v v28, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs1r.v v29, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs1r.v v30, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a5) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 4 ; CHECK-NEXT: addi a5, a1, 256 @@ -456,6 +502,50 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v16, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %load = call <33 x double> @llvm.vp.load.v33f64.p0(ptr %ptr, <33 x i1> %m, i32 %evl) ret <33 x double> %load diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll @@ -1062,9 +1062,63 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 26 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 23 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 4 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -1101,7 +1155,61 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1112,6 +1220,14 @@ define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpmerge_vf_v32f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a2, a0, a1 @@ -1129,6 +1245,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement <32 x double> poison, double %a, i32 0 %va = shufflevector <32 x double> %elt.head, <32 x double> poison, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -1690,6 +1690,52 @@ define void @vpscatter_v32f64(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_v32f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: sub a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 6 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 2 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 1 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vle32.v v24, (a0) @@ -1711,6 +1757,50 @@ ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (zero), v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_v32f64: @@ -1718,9 +1808,56 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a3, a1, 4 +; RV64-NEXT: sub a1, a3, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 14 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 13 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 12 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 11 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 10 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a3, a1, 3 +; RV64-NEXT: add a1, a3, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: addi a1, a0, 128 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v24, (a1) @@ -1746,7 +1883,54 @@ ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1757,6 +1941,52 @@ define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_v32i32_v32f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vle32.v v24, (a1) @@ -1779,6 +2009,50 @@ ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_v32i32_v32f64: @@ -1786,10 +2060,99 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 10 +; RV64-NEXT: li a4, 24 ; RV64-NEXT: mul a3, a3, a4 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 23 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v1, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 22 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v2, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 21 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v3, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 20 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 19 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 18 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a4, a3, 4 +; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a4, a3, 4 +; RV64-NEXT: sub a3, a4, a3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 14 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 13 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 12 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 11 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 10 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a4, a3, 3 +; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle32.v v24, (a1) @@ -1829,8 +2192,97 @@ ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 23 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 22 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 21 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 20 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 19 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 18 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: li a1, 10 ; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 24 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1842,6 +2294,52 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_v32i32_v32f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vle32.v v24, (a1) @@ -1864,6 +2362,50 @@ ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_v32i32_v32f64: @@ -1871,10 +2413,99 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 10 +; RV64-NEXT: li a4, 24 ; RV64-NEXT: mul a3, a3, a4 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 23 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v1, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 22 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v2, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 21 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v3, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 20 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 19 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 18 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a4, a3, 4 +; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a4, a3, 4 +; RV64-NEXT: sub a3, a4, a3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 14 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 13 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 12 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 11 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 10 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a4, a3, 3 +; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle32.v v24, (a1) @@ -1915,8 +2546,97 @@ ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 23 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 22 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 21 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 20 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 19 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 18 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: li a1, 10 ; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 24 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1929,6 +2649,52 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, <32 x i32> %idxs, <32 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_v32i32_v32f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vle32.v v24, (a1) @@ -1951,6 +2717,50 @@ ; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_v32i32_v32f64: @@ -1958,10 +2768,99 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: li a4, 10 +; RV64-NEXT: li a4, 24 ; RV64-NEXT: mul a3, a3, a4 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 23 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v1, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 22 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v2, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 21 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v3, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 20 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 19 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 18 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a4, a3, 4 +; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a4, a3, 4 +; RV64-NEXT: sub a3, a4, a3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 14 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 13 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 12 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 11 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 10 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a4, a3, 3 +; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV64-NEXT: vle32.v v24, (a1) @@ -2002,8 +2901,97 @@ ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 23 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 22 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 21 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 20 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 19 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 18 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: li a1, 10 ; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 24 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll @@ -8,10 +8,100 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a4, 40 +; CHECK-NEXT: li a4, 56 ; CHECK-NEXT: mul a2, a2, a4 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 54 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 53 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 52 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 51 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 50 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 49 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 48 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 47 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 46 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 45 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 44 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 43 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 42 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 41 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 40 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: li a4, 24 ; CHECK-NEXT: mul a2, a2, a4 @@ -92,8 +182,98 @@ ; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 54 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 53 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 51 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 50 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 49 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 48 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 47 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 46 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 45 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 44 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 43 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 42 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 41 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 40 ; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 56 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -104,6 +284,93 @@ define <512 x i8> @vadd_v512i8_zvl256(<512 x i8> %a, <512 x i8> %b) #1 { ; CHECK-LABEL: vadd_v512i8_zvl256: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 14 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 13 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 12 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 11 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 10 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 256 ; CHECK-NEXT: li a2, 256 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma @@ -111,6 +378,91 @@ ; CHECK-NEXT: vle8.v v0, (a1) ; CHECK-NEXT: vadd.vv v8, v8, v24 ; CHECK-NEXT: vadd.vv v16, v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %c = add <512 x i8> %a, %b ret <512 x i8> %c diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -154,9 +154,68 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a4, 18 +; CHECK-NEXT: mul a2, a2, a4 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a4, a2, 4 +; CHECK-NEXT: add a2, a4, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a4, a2, 4 +; CHECK-NEXT: sub a2, a4, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 14 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 13 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 12 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 11 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a4, 10 +; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a4, a2, 3 +; CHECK-NEXT: add a2, a4, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v2, v8 @@ -185,7 +244,66 @@ ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -199,10 +317,57 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 5 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 5 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 30 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 29 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 28 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 27 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 26 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 25 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: li a3, 24 ; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v24, (a0) @@ -242,8 +407,55 @@ ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: vmv8r.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 5 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 30 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 29 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 28 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 27 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 25 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -403,9 +615,57 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 23 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 4 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 @@ -442,7 +702,55 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -456,9 +764,57 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 23 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 22 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 21 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 20 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 19 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 @@ -486,7 +842,55 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -598,9 +1002,56 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 4 +; CHECK-NEXT: sub a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 3 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma @@ -626,7 +1077,54 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define <2 x i16> @vwadd_v2i16(ptr %x, ptr %y) { ; CHECK-LABEL: vwadd_v2i16: @@ -250,8 +250,97 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v16, (a0) @@ -268,7 +357,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwadd.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -285,8 +463,97 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0) @@ -303,7 +570,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwadd.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -320,8 +676,97 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0) @@ -336,7 +781,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwadd.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define <2 x i16> @vwaddu_v2i16(ptr %x, ptr %y) { ; CHECK-LABEL: vwaddu_v2i16: @@ -250,8 +250,97 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v16, (a0) @@ -268,7 +357,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwaddu.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -285,8 +463,97 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0) @@ -303,7 +570,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwaddu.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -320,8 +676,97 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0) @@ -336,7 +781,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwaddu.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define <2 x i16> @vwmul_v2i16(ptr %x, ptr %y) { ; CHECK-LABEL: vwmul_v2i16: @@ -275,9 +275,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v16, (a0) @@ -294,7 +383,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmul.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -312,9 +490,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0) @@ -331,7 +598,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmul.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -349,9 +705,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0) @@ -366,7 +811,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmul.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define <2 x i16> @vwmulsu_v2i16(ptr %x, ptr %y) { ; CHECK-LABEL: vwmulsu_v2i16: @@ -267,9 +267,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v16, (a0) @@ -286,7 +375,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulsu.vv v16, v0, v24 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -304,9 +482,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0) @@ -323,7 +590,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulsu.vv v16, v0, v24 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -341,9 +697,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0) @@ -358,7 +803,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulsu.vv v16, v0, v24 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s define <2 x i16> @vwmulu_v2i16(ptr %x, ptr %y) { ; CHECK-LABEL: vwmulu_v2i16: @@ -251,9 +251,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v16, (a0) @@ -270,7 +359,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulu.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -288,9 +466,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0) @@ -307,7 +574,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulu.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -325,9 +681,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0) @@ -342,7 +787,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwmulu.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define <2 x i16> @vwsub_v2i16(ptr %x, ptr %y) { ; CHECK-LABEL: vwsub_v2i16: @@ -250,8 +250,97 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v16, (a0) @@ -268,7 +357,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwsub.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -285,8 +463,97 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0) @@ -303,7 +570,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwsub.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -320,8 +676,97 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0) @@ -336,7 +781,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwsub.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -riscv-v-vector-bits-min=128 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 define <2 x i16> @vwsubu_v2i16(ptr %x, ptr %y) { ; CHECK-LABEL: vwsubu_v2i16: @@ -250,8 +250,97 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vle8.v v16, (a0) @@ -268,7 +357,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwsubu.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -285,8 +463,97 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0) @@ -303,7 +570,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwsubu.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -320,8 +676,97 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0) @@ -336,7 +781,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vwsubu.vv v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll @@ -153,6 +153,58 @@ define <32 x i64> @vzext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vzext_v32i64_v32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 10 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 @@ -173,6 +225,56 @@ ; CHECK-NEXT: vmv1r.v v0, v1 ; CHECK-NEXT: vzext.vf2 v24, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.zext.v32i64.v32i32(<32 x i32> %va, <32 x i1> %m, i32 %evl) ret <32 x i64> %v @@ -181,6 +283,52 @@ define <32 x i64> @vzext_v32i64_v32i32_unmasked(<32 x i32> %va, i32 zeroext %evl) { ; CHECK-LABEL: vzext_v32i64_v32i32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a2, a0, a1 ; CHECK-NEXT: addi a2, a2, -1 @@ -197,6 +345,50 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vzext.vf2 v24, v8 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.zext.v32i64.v32i32(<32 x i32> %va, <32 x i1> shufflevector (<32 x i1> insertelement (<32 x i1> undef, i1 true, i32 0), <32 x i1> undef, <32 x i32> zeroinitializer), i32 %evl) ret <32 x i64> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.floor.nxv1f16(, , i32) @@ -233,6 +233,52 @@ define @vp_floor_nxv32f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI10_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI10_0)(a1) @@ -248,6 +294,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv32f16( %va, %m, i32 %evl) ret %v @@ -459,6 +549,52 @@ define @vp_floor_nxv16f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -474,6 +610,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv16f32( %va, %m, i32 %evl) ret %v @@ -641,6 +821,52 @@ define @vp_floor_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) @@ -656,6 +882,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv7f64( %va, %m, i32 %evl) ret %v @@ -687,6 +957,52 @@ define @vp_floor_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) @@ -702,6 +1018,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.floor.nxv8f64( %va, %m, i32 %evl) ret %v @@ -737,9 +1097,68 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 14 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 13 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 12 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 11 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 10 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 @@ -788,7 +1207,66 @@ ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -799,6 +1277,52 @@ define @vp_floor_nxv16f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) @@ -828,6 +1352,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v -verify-machineinstrs < %s \ ; RUN: -target-abi=lp64d -riscv-v-vector-bits-min=0 | FileCheck --check-prefix=CHECK-NOV %s -; RUN: llc -mtriple=riscv64 -mattr=+f,+d,+v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+f,+d,+v -verify-machineinstrs < %s \ ; RUN: -target-abi=lp64d -riscv-v-vector-bits-min=-1 | FileCheck --check-prefix=CHECK-V %s ; i32 saturate @@ -456,9 +456,96 @@ ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: li a2, 18 +; CHECK-V-NEXT: mul a1, a1, a2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 18 * vlenb +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 4 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 4 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 14 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 13 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 12 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 11 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 10 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 3 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 6 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 2 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) @@ -503,7 +590,94 @@ ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 18 +; CHECK-V-NEXT: mul a0, a0, a1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -614,9 +788,96 @@ ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: li a2, 18 +; CHECK-V-NEXT: mul a1, a1, a2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 18 * vlenb +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 4 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 4 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 14 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 13 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 12 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 11 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 10 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 3 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 6 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 2 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) @@ -660,7 +921,94 @@ ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 18 +; CHECK-V-NEXT: mul a0, a0, a1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -781,9 +1129,96 @@ ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: li a2, 18 +; CHECK-V-NEXT: mul a1, a1, a2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 18 * vlenb +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 4 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 4 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 14 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 13 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 12 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 11 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 10 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 3 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 6 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 2 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) @@ -828,7 +1263,94 @@ ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 18 +; CHECK-V-NEXT: mul a0, a0, a1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -1403,9 +1925,96 @@ ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: li a2, 18 +; CHECK-V-NEXT: mul a1, a1, a2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 18 * vlenb +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 4 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 4 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 14 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 13 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 12 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 11 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 10 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 3 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 6 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 2 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-V-NEXT: lhu s0, 56(a0) ; CHECK-V-NEXT: lhu s1, 48(a0) ; CHECK-V-NEXT: lhu s2, 40(a0) @@ -1491,7 +2100,94 @@ ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 18 +; CHECK-V-NEXT: mul a0, a0, a1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -1686,9 +2382,96 @@ ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: li a2, 18 +; CHECK-V-NEXT: mul a1, a1, a2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 18 * vlenb +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 4 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 4 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 14 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 13 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 12 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 11 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 10 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 3 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 6 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 2 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-V-NEXT: lhu s0, 56(a0) ; CHECK-V-NEXT: lhu s1, 48(a0) ; CHECK-V-NEXT: lhu s2, 40(a0) @@ -1772,7 +2555,94 @@ ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 18 +; CHECK-V-NEXT: mul a0, a0, a1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -1989,9 +2859,96 @@ ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: li a2, 18 +; CHECK-V-NEXT: mul a1, a1, a2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 18 * vlenb +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 4 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 4 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 14 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 13 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 12 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 11 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 10 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 3 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 6 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 2 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-V-NEXT: lhu s0, 56(a0) ; CHECK-V-NEXT: lhu s1, 48(a0) ; CHECK-V-NEXT: lhu s2, 40(a0) @@ -2076,7 +3033,94 @@ ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 18 +; CHECK-V-NEXT: mul a0, a0, a1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -2191,9 +3235,94 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 16 * vlenb +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2264,7 +3393,92 @@ ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -2325,9 +3539,94 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 16 * vlenb +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2352,7 +3651,92 @@ ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -2437,9 +3821,94 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 16 * vlenb +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2491,7 +3960,92 @@ ; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -2599,9 +4153,94 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 16 * vlenb +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -2672,7 +4311,92 @@ ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -2733,9 +4457,94 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 16 * vlenb +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -2760,7 +4569,92 @@ ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -2845,9 +4739,94 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 16 * vlenb +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -2899,7 +4878,92 @@ ; CHECK-V-NEXT: vmv.s.x v9, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -3001,16 +5065,101 @@ ; ; CHECK-V-LABEL: stest_f16i64: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -32 -; CHECK-V-NEXT: .cfi_def_cfa_offset 32 -; CHECK-V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -48 +; CHECK-V-NEXT: .cfi_def_cfa_offset 48 +; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 4 +; CHECK-V-NEXT: sub sp, sp, a2 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 14 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 13 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 12 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 11 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 10 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 3 +; CHECK-V-NEXT: add a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 3 +; CHECK-V-NEXT: sub a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 6 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 2 +; CHECK-V-NEXT: add a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 1 +; CHECK-V-NEXT: add a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 1 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: addi a2, sp, 16 +; CHECK-V-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv s2, a1 ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2@plt @@ -3075,11 +5224,95 @@ ; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vmv.s.x v8, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 32 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 +; CHECK-V-NEXT: add sp, sp, a0 +; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 48 ; CHECK-V-NEXT: ret entry: %conv = fptosi <2 x half> %x to <2 x i128> @@ -3129,16 +5362,101 @@ ; ; CHECK-V-LABEL: utesth_f16i64: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -32 -; CHECK-V-NEXT: .cfi_def_cfa_offset 32 -; CHECK-V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -48 +; CHECK-V-NEXT: .cfi_def_cfa_offset 48 +; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 4 +; CHECK-V-NEXT: sub sp, sp, a2 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 14 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 13 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 12 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 11 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 10 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 3 +; CHECK-V-NEXT: add a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 3 +; CHECK-V-NEXT: sub a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 6 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 2 +; CHECK-V-NEXT: add a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 1 +; CHECK-V-NEXT: add a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 1 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: addi a2, sp, 16 +; CHECK-V-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv s0, a1 ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2@plt @@ -3158,11 +5476,95 @@ ; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vmv.s.x v8, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 32 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 +; CHECK-V-NEXT: add sp, sp, a0 +; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 48 ; CHECK-V-NEXT: ret entry: %conv = fptoui <2 x half> %x to <2 x i128> @@ -3236,16 +5638,101 @@ ; ; CHECK-V-LABEL: ustest_f16i64: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -32 -; CHECK-V-NEXT: .cfi_def_cfa_offset 32 -; CHECK-V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -48 +; CHECK-V-NEXT: .cfi_def_cfa_offset 48 +; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 4 +; CHECK-V-NEXT: sub sp, sp, a2 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 14 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 13 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 12 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 11 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 10 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 3 +; CHECK-V-NEXT: add a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 3 +; CHECK-V-NEXT: sub a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 6 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 2 +; CHECK-V-NEXT: add a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 1 +; CHECK-V-NEXT: add a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 1 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: addi a2, sp, 16 +; CHECK-V-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv s2, a1 ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2@plt @@ -3292,11 +5779,95 @@ ; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vmv.s.x v8, a2 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 32 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 +; CHECK-V-NEXT: add sp, sp, a0 +; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 48 ; CHECK-V-NEXT: ret entry: %conv = fptosi <2 x half> %x to <2 x i128> @@ -3752,9 +6323,96 @@ ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: li a2, 18 +; CHECK-V-NEXT: mul a1, a1, a2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 18 * vlenb +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 4 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 4 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 14 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 13 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 12 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 11 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 10 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 3 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 6 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 2 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) @@ -3799,7 +6457,94 @@ ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 18 +; CHECK-V-NEXT: mul a0, a0, a1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -3908,17 +6653,104 @@ ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: li a2, 18 +; CHECK-V-NEXT: mul a1, a1, a2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb -; CHECK-V-NEXT: lhu s0, 24(a0) -; CHECK-V-NEXT: lhu s1, 16(a0) -; CHECK-V-NEXT: lhu s2, 0(a0) -; CHECK-V-NEXT: lhu a0, 8(a0) -; CHECK-V-NEXT: fmv.w.x fa0, a0 -; CHECK-V-NEXT: call __extendhfsf2@plt -; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 18 * vlenb +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 4 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 4 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 14 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 13 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 12 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 11 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 10 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 3 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 6 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 2 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: lhu s0, 24(a0) +; CHECK-V-NEXT: lhu s1, 16(a0) +; CHECK-V-NEXT: lhu s2, 0(a0) +; CHECK-V-NEXT: lhu a0, 8(a0) +; CHECK-V-NEXT: fmv.w.x fa0, a0 +; CHECK-V-NEXT: call __extendhfsf2@plt +; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz +; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill @@ -3954,7 +6786,94 @@ ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 18 +; CHECK-V-NEXT: mul a0, a0, a1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -4074,9 +6993,96 @@ ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: li a2, 18 +; CHECK-V-NEXT: mul a1, a1, a2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 18 * vlenb +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 4 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 4 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 14 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 13 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 12 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 11 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 10 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 3 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 6 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 2 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-V-NEXT: lhu s0, 24(a0) ; CHECK-V-NEXT: lhu s1, 16(a0) ; CHECK-V-NEXT: lhu s2, 0(a0) @@ -4121,7 +7127,94 @@ ; CHECK-V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 18 +; CHECK-V-NEXT: mul a0, a0, a1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -4684,9 +7777,96 @@ ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: li a2, 18 +; CHECK-V-NEXT: mul a1, a1, a2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 18 * vlenb +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 4 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 4 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 14 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 13 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 12 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 11 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 10 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 3 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 6 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 2 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-V-NEXT: lhu s0, 56(a0) ; CHECK-V-NEXT: lhu s1, 48(a0) ; CHECK-V-NEXT: lhu s2, 40(a0) @@ -4772,7 +7952,94 @@ ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 18 +; CHECK-V-NEXT: mul a0, a0, a1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -4963,9 +8230,96 @@ ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: li a2, 18 +; CHECK-V-NEXT: mul a1, a1, a2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 18 * vlenb +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 4 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 4 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 14 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 13 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 12 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 11 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 10 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 3 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 6 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 2 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-V-NEXT: lhu s0, 56(a0) ; CHECK-V-NEXT: lhu s1, 48(a0) ; CHECK-V-NEXT: lhu s2, 40(a0) @@ -5049,7 +8403,94 @@ ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 18 +; CHECK-V-NEXT: mul a0, a0, a1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -5265,9 +8706,96 @@ ; CHECK-V-NEXT: .cfi_offset s5, -56 ; CHECK-V-NEXT: .cfi_offset s6, -64 ; CHECK-V-NEXT: csrr a1, vlenb -; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: li a2, 18 +; CHECK-V-NEXT: mul a1, a1, a2 ; CHECK-V-NEXT: sub sp, sp, a1 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xd0, 0x00, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 80 + 18 * vlenb +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 4 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 4 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 14 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 13 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 12 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 11 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 10 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 3 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 3 +; CHECK-V-NEXT: sub a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: li a2, 6 +; CHECK-V-NEXT: mul a1, a1, a2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 2 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 2 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a2, a1, 1 +; CHECK-V-NEXT: add a1, a2, a1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a1, vlenb +; CHECK-V-NEXT: slli a1, a1, 1 +; CHECK-V-NEXT: add a1, sp, a1 +; CHECK-V-NEXT: addi a1, a1, 16 +; CHECK-V-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-V-NEXT: lhu s0, 56(a0) ; CHECK-V-NEXT: lhu s1, 48(a0) ; CHECK-V-NEXT: lhu s2, 40(a0) @@ -5352,7 +8880,94 @@ ; CHECK-V-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-V-NEXT: vnsrl.wi v8, v10, 0 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 18 +; CHECK-V-NEXT: mul a0, a0, a1 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -5468,9 +9083,94 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 16 * vlenb +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -5544,7 +9244,92 @@ ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -5603,9 +9388,94 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 16 * vlenb +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -5630,7 +9500,92 @@ ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -5703,9 +9658,94 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 16 * vlenb +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -5745,7 +9785,92 @@ ; CHECK-V-NEXT: vmv.s.x v9, a1 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -5854,9 +9979,94 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 16 * vlenb +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -5930,7 +10140,92 @@ ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -5989,9 +10284,94 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 16 * vlenb +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -6016,7 +10396,92 @@ ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -6089,9 +10554,94 @@ ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: csrr a0, vlenb -; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: sub sp, sp, a0 -; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 2 * vlenb +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xc0, 0x00, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 64 + 16 * vlenb +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: addi a0, sp, 32 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-V-NEXT: vsetivli zero, 1, e32, mf2, ta, ma @@ -6131,7 +10681,92 @@ ; CHECK-V-NEXT: vmv.s.x v9, a1 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 ; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 4 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb ; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 32 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 ; CHECK-V-NEXT: add sp, sp, a0 ; CHECK-V-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; CHECK-V-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -6234,16 +10869,101 @@ ; ; CHECK-V-LABEL: stest_f16i64_mm: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -32 -; CHECK-V-NEXT: .cfi_def_cfa_offset 32 -; CHECK-V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -48 +; CHECK-V-NEXT: .cfi_def_cfa_offset 48 +; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 4 +; CHECK-V-NEXT: sub sp, sp, a2 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 14 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 13 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 12 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 11 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 10 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 3 +; CHECK-V-NEXT: add a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 3 +; CHECK-V-NEXT: sub a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 6 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 2 +; CHECK-V-NEXT: add a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 1 +; CHECK-V-NEXT: add a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 1 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: addi a2, sp, 16 +; CHECK-V-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv s2, a1 ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2@plt @@ -6311,11 +11031,95 @@ ; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vmv.s.x v8, s0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 32 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 +; CHECK-V-NEXT: add sp, sp, a0 +; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 48 ; CHECK-V-NEXT: ret entry: %conv = fptosi <2 x half> %x to <2 x i128> @@ -6362,16 +11166,101 @@ ; ; CHECK-V-LABEL: utesth_f16i64_mm: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -32 -; CHECK-V-NEXT: .cfi_def_cfa_offset 32 -; CHECK-V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -48 +; CHECK-V-NEXT: .cfi_def_cfa_offset 48 +; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 4 +; CHECK-V-NEXT: sub sp, sp, a2 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 14 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 13 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 12 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 11 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 10 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 3 +; CHECK-V-NEXT: add a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 3 +; CHECK-V-NEXT: sub a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 6 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 2 +; CHECK-V-NEXT: add a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 1 +; CHECK-V-NEXT: add a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 1 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: addi a2, sp, 16 +; CHECK-V-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv s0, a0 ; CHECK-V-NEXT: fmv.w.x fa0, a1 ; CHECK-V-NEXT: call __extendhfsf2@plt @@ -6391,11 +11280,95 @@ ; CHECK-V-NEXT: vmv.s.x v9, a1 ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 32 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 +; CHECK-V-NEXT: add sp, sp, a0 +; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 48 ; CHECK-V-NEXT: ret entry: %conv = fptoui <2 x half> %x to <2 x i128> @@ -6457,16 +11430,101 @@ ; ; CHECK-V-LABEL: ustest_f16i64_mm: ; CHECK-V: # %bb.0: # %entry -; CHECK-V-NEXT: addi sp, sp, -32 -; CHECK-V-NEXT: .cfi_def_cfa_offset 32 -; CHECK-V-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; CHECK-V-NEXT: sd s2, 0(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: addi sp, sp, -48 +; CHECK-V-NEXT: .cfi_def_cfa_offset 48 +; CHECK-V-NEXT: sd ra, 40(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s0, 32(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s1, 24(sp) # 8-byte Folded Spill +; CHECK-V-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; CHECK-V-NEXT: .cfi_offset ra, -8 ; CHECK-V-NEXT: .cfi_offset s0, -16 ; CHECK-V-NEXT: .cfi_offset s1, -24 ; CHECK-V-NEXT: .cfi_offset s2, -32 +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 4 +; CHECK-V-NEXT: sub sp, sp, a2 +; CHECK-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 14 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 13 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 12 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 11 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 10 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 3 +; CHECK-V-NEXT: add a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 3 +; CHECK-V-NEXT: sub a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: li a3, 6 +; CHECK-V-NEXT: mul a2, a2, a3 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 2 +; CHECK-V-NEXT: add a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a3, a2, 1 +; CHECK-V-NEXT: add a2, a3, a2 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: slli a2, a2, 1 +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: csrr a2, vlenb +; CHECK-V-NEXT: add a2, sp, a2 +; CHECK-V-NEXT: addi a2, a2, 16 +; CHECK-V-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-V-NEXT: addi a2, sp, 16 +; CHECK-V-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-V-NEXT: mv s2, a1 ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2@plt @@ -6502,11 +11560,95 @@ ; CHECK-V-NEXT: vmv.s.x v9, a0 ; CHECK-V-NEXT: vmv.s.x v8, a1 ; CHECK-V-NEXT: vslideup.vi v8, v9, 1 -; CHECK-V-NEXT: ld ra, 24(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s0, 16(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s1, 8(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: ld s2, 0(sp) # 8-byte Folded Reload -; CHECK-V-NEXT: addi sp, sp, 32 +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 14 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 13 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 12 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 11 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 10 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 3 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 3 +; CHECK-V-NEXT: sub a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: li a1, 6 +; CHECK-V-NEXT: mul a0, a0, a1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 2 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 2 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a1, a0, 1 +; CHECK-V-NEXT: add a0, a1, a0 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 1 +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: add a0, sp, a0 +; CHECK-V-NEXT: addi a0, a0, 16 +; CHECK-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: addi a0, sp, 16 +; CHECK-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-V-NEXT: csrr a0, vlenb +; CHECK-V-NEXT: slli a0, a0, 4 +; CHECK-V-NEXT: add sp, sp, a0 +; CHECK-V-NEXT: ld ra, 40(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s0, 32(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s1, 24(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: ld s2, 16(sp) # 8-byte Folded Reload +; CHECK-V-NEXT: addi sp, sp, 48 ; CHECK-V-NEXT: ret entry: %conv = fptosi <2 x half> %x to <2 x i128> diff --git a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll --- a/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fptosi-sat.ll @@ -182,6 +182,29 @@ define @test_signed_v8f64_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f64_v8i16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: lui a0, %hi(.LCPI11_1) @@ -197,6 +220,27 @@ ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmerge.vim v8, v16, 0, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f64.nxv8i16( %f) ret %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll @@ -213,9 +213,56 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill @@ -229,7 +276,54 @@ ; CHECK-NEXT: vsrl.vv v16, v24, v16, v0.t ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -244,9 +338,56 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill @@ -260,7 +401,54 @@ ; CHECK-NEXT: vsll.vv v16, v24, v16, v0.t ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -445,9 +633,56 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill @@ -461,7 +696,54 @@ ; CHECK-NEXT: vsrl.vv v16, v24, v16, v0.t ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -476,9 +758,56 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill @@ -492,7 +821,54 @@ ; CHECK-NEXT: vsll.vv v16, v24, v16, v0.t ; CHECK-NEXT: vor.vv v8, v16, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -651,9 +1027,56 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill @@ -669,7 +1092,54 @@ ; CHECK-NEXT: vsll.vv v8, v24, v8, v0.t ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -684,9 +1154,56 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill @@ -703,7 +1220,54 @@ ; CHECK-NEXT: vsrl.vv v16, v24, v16, v0.t ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -826,9 +1390,56 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill @@ -844,7 +1455,54 @@ ; CHECK-NEXT: vsll.vv v8, v24, v8, v0.t ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -859,9 +1517,56 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill @@ -878,7 +1583,54 @@ ; CHECK-NEXT: vsrl.vv v16, v24, v16, v0.t ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -893,9 +1645,56 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill @@ -911,7 +1710,54 @@ ; CHECK-NEXT: vsll.vv v8, v24, v8, v0.t ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -926,9 +1772,56 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill @@ -945,7 +1838,54 @@ ; CHECK-NEXT: vsrl.vv v16, v24, v16, v0.t ; CHECK-NEXT: vor.vv v8, v8, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -960,10 +1900,16 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 56 +; CHECK-NEXT: li a3, 58 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x3a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 58 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 56 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: li a3, 24 @@ -1136,6 +2082,12 @@ ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 56 ; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 58 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1150,10 +2102,16 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 56 +; CHECK-NEXT: li a3, 58 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x3a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 58 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 56 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: li a3, 24 @@ -1324,6 +2282,12 @@ ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 56 ; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 58 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/inline-asm.ll b/llvm/test/CodeGen/RISCV/rvv/inline-asm.ll --- a/llvm/test/CodeGen/RISCV/rvv/inline-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/inline-asm.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v < %s \ ; RUN: --verify-machineinstrs | FileCheck %s define @test_1xi1( %in, %in2) nounwind { @@ -365,12 +365,32 @@ define @test_specify_reg_mf2( %in, %in2) nounwind { ; CHECK-LABEL: test_specify_reg_mf2: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v2, v9 ; CHECK-NEXT: vmv1r.v v1, v8 ; CHECK-NEXT: #APP ; CHECK-NEXT: vadd.vv v0, v1, v2 ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call asm "vadd.vv $0, $1, $2", "={v0},{v1},{v2}"( %in, %in2) @@ -380,12 +400,32 @@ define @test_specify_reg_m1( %in, %in2) nounwind { ; CHECK-LABEL: test_specify_reg_m1: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v2, v9 ; CHECK-NEXT: vmv1r.v v1, v8 ; CHECK-NEXT: #APP ; CHECK-NEXT: vadd.vv v0, v1, v2 ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call asm "vadd.vv $0, $1, $2", "={v0},{v1},{v2}"( %in, %in2) @@ -395,12 +435,66 @@ define @test_specify_reg_m2( %in, %in2) nounwind { ; CHECK-LABEL: test_specify_reg_m2: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v4, v10 ; CHECK-NEXT: vmv2r.v v2, v8 ; CHECK-NEXT: #APP ; CHECK-NEXT: vadd.vv v0, v2, v4 ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: vmv2r.v v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call asm "vadd.vv $0, $1, $2", "={v0},{v2},{v4}"( %in, %in2) @@ -410,11 +504,31 @@ define @test_specify_reg_mask( %in, %in2) nounwind { ; CHECK-LABEL: test_specify_reg_mask: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v2, v8 ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: #APP ; CHECK-NEXT: vmand.mm v0, v1, v2 ; CHECK-NEXT: #NO_APP +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call asm "vmand.mm $0, $1, $2", "={v0},{v1},{v2}"( %in, %in2) diff --git a/llvm/test/CodeGen/RISCV/rvv/memory-args.ll b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll --- a/llvm/test/CodeGen/RISCV/rvv/memory-args.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll @@ -12,10 +12,100 @@ define @callee( %arg0, %arg1, %arg2) { ; RV64IV-LABEL: callee: ; RV64IV: # %bb.0: +; RV64IV-NEXT: addi sp, sp, -16 +; RV64IV-NEXT: .cfi_def_cfa_offset 16 +; RV64IV-NEXT: csrr a1, vlenb +; RV64IV-NEXT: slli a1, a1, 3 +; RV64IV-NEXT: sub sp, sp, a1 +; RV64IV-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64IV-NEXT: csrr a1, vlenb +; RV64IV-NEXT: slli a2, a1, 3 +; RV64IV-NEXT: sub a1, a2, a1 +; RV64IV-NEXT: add a1, sp, a1 +; RV64IV-NEXT: addi a1, a1, 16 +; RV64IV-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64IV-NEXT: csrr a1, vlenb +; RV64IV-NEXT: li a2, 6 +; RV64IV-NEXT: mul a1, a1, a2 +; RV64IV-NEXT: add a1, sp, a1 +; RV64IV-NEXT: addi a1, a1, 16 +; RV64IV-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64IV-NEXT: csrr a1, vlenb +; RV64IV-NEXT: slli a2, a1, 2 +; RV64IV-NEXT: add a1, a2, a1 +; RV64IV-NEXT: add a1, sp, a1 +; RV64IV-NEXT: addi a1, a1, 16 +; RV64IV-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64IV-NEXT: csrr a1, vlenb +; RV64IV-NEXT: slli a1, a1, 2 +; RV64IV-NEXT: add a1, sp, a1 +; RV64IV-NEXT: addi a1, a1, 16 +; RV64IV-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64IV-NEXT: csrr a1, vlenb +; RV64IV-NEXT: slli a2, a1, 1 +; RV64IV-NEXT: add a1, a2, a1 +; RV64IV-NEXT: add a1, sp, a1 +; RV64IV-NEXT: addi a1, a1, 16 +; RV64IV-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64IV-NEXT: csrr a1, vlenb +; RV64IV-NEXT: slli a1, a1, 1 +; RV64IV-NEXT: add a1, sp, a1 +; RV64IV-NEXT: addi a1, a1, 16 +; RV64IV-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64IV-NEXT: csrr a1, vlenb +; RV64IV-NEXT: add a1, sp, a1 +; RV64IV-NEXT: addi a1, a1, 16 +; RV64IV-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64IV-NEXT: addi a1, sp, 16 +; RV64IV-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64IV-NEXT: vl8r.v v24, (a0) ; RV64IV-NEXT: li a0, 1024 ; RV64IV-NEXT: vsetvli zero, a0, e8, m8, tu, ma ; RV64IV-NEXT: vmacc.vv v8, v16, v24 +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a1, a0, 3 +; RV64IV-NEXT: sub a0, a1, a0 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 16 +; RV64IV-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: li a1, 6 +; RV64IV-NEXT: mul a0, a0, a1 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 16 +; RV64IV-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a1, a0, 2 +; RV64IV-NEXT: add a0, a1, a0 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 16 +; RV64IV-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 2 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 16 +; RV64IV-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a1, a0, 1 +; RV64IV-NEXT: add a0, a1, a0 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 16 +; RV64IV-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 1 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 16 +; RV64IV-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 16 +; RV64IV-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64IV-NEXT: addi a0, sp, 16 +; RV64IV-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 3 +; RV64IV-NEXT: add sp, sp, a0 +; RV64IV-NEXT: addi sp, sp, 16 ; RV64IV-NEXT: ret %ret = call @llvm.riscv.vmacc.nxv64i8.nxv64i8( %arg0, @@ -36,10 +126,58 @@ ; RV64IV-NEXT: addi s0, sp, 80 ; RV64IV-NEXT: .cfi_def_cfa s0, 0 ; RV64IV-NEXT: csrr a0, vlenb -; RV64IV-NEXT: slli a0, a0, 5 +; RV64IV-NEXT: li a1, 40 +; RV64IV-NEXT: mul a0, a0, a1 ; RV64IV-NEXT: sub sp, sp, a0 ; RV64IV-NEXT: andi sp, sp, -64 ; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: li a1, 39 +; RV64IV-NEXT: mul a0, a0, a1 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 64 +; RV64IV-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: li a1, 38 +; RV64IV-NEXT: mul a0, a0, a1 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 64 +; RV64IV-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: li a1, 37 +; RV64IV-NEXT: mul a0, a0, a1 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 64 +; RV64IV-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: li a1, 36 +; RV64IV-NEXT: mul a0, a0, a1 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 64 +; RV64IV-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: li a1, 35 +; RV64IV-NEXT: mul a0, a0, a1 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 64 +; RV64IV-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: li a1, 34 +; RV64IV-NEXT: mul a0, a0, a1 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 64 +; RV64IV-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a1, a0, 5 +; RV64IV-NEXT: add a0, a1, a0 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 64 +; RV64IV-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 5 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 64 +; RV64IV-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill +; RV64IV-NEXT: csrr a0, vlenb ; RV64IV-NEXT: li a1, 24 ; RV64IV-NEXT: mul a0, a0, a1 ; RV64IV-NEXT: add a0, sp, a0 @@ -59,6 +197,53 @@ ; RV64IV-NEXT: addi a0, sp, 64 ; RV64IV-NEXT: vs8r.v v24, (a1) ; RV64IV-NEXT: call callee@plt +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: li a1, 39 +; RV64IV-NEXT: mul a0, a0, a1 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 64 +; RV64IV-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: li a1, 38 +; RV64IV-NEXT: mul a0, a0, a1 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 64 +; RV64IV-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: li a1, 37 +; RV64IV-NEXT: mul a0, a0, a1 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 64 +; RV64IV-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: li a1, 36 +; RV64IV-NEXT: mul a0, a0, a1 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 64 +; RV64IV-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: li a1, 35 +; RV64IV-NEXT: mul a0, a0, a1 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 64 +; RV64IV-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: li a1, 34 +; RV64IV-NEXT: mul a0, a0, a1 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 64 +; RV64IV-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a1, a0, 5 +; RV64IV-NEXT: add a0, a1, a0 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 64 +; RV64IV-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64IV-NEXT: csrr a0, vlenb +; RV64IV-NEXT: slli a0, a0, 5 +; RV64IV-NEXT: add a0, sp, a0 +; RV64IV-NEXT: addi a0, a0, 64 +; RV64IV-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload ; RV64IV-NEXT: addi sp, s0, -80 ; RV64IV-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64IV-NEXT: ld s0, 64(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -988,11 +988,101 @@ ; ; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf8 v24, v8 ; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, %idxs %v = call @llvm.masked.gather.nxv8i64.nxv8p0( %ptrs, i32 8, %m, %passthru) @@ -1012,11 +1102,101 @@ ; ; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf8 v24, v8 ; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -1053,11 +1233,101 @@ ; ; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf4 v24, v8 ; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, %idxs %v = call @llvm.masked.gather.nxv8i64.nxv8p0( %ptrs, i32 8, %m, %passthru) @@ -1077,11 +1347,101 @@ ; ; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf4 v24, v8 ; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -1117,11 +1477,101 @@ ; ; RV64-LABEL: mgather_baseidx_nxv8i32_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf2 v24, v8 ; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, %idxs %v = call @llvm.masked.gather.nxv8i64.nxv8p0( %ptrs, i32 8, %m, %passthru) @@ -1140,11 +1590,101 @@ ; ; RV64-LABEL: mgather_baseidx_sext_nxv8i32_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf2 v24, v8 ; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -1164,11 +1704,101 @@ ; ; RV64-LABEL: mgather_baseidx_zext_nxv8i32_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vzext.vf2 v24, v8 ; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -1179,12 +1809,56 @@ define @mgather_baseidx_nxv8i64(ptr %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_nxv8i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; RV32-NEXT: vnsrl.wi v24, v8, 0 ; RV32-NEXT: vsll.vi v8, v24, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_baseidx_nxv8i64: @@ -1207,6 +1881,52 @@ define void @mgather_nxv16i64( %ptrs0, %ptrs1, %m, %passthru0, %passthru1, * %out) { ; RV32-LABEL: mgather_nxv16i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: sub a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 6 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 2 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 1 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vl8re64.v v24, (a0) ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t @@ -1220,6 +1940,50 @@ ; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: vs8r.v v24, (a0) ; RV32-NEXT: vs8r.v v16, (a1) +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_nxv16i64: @@ -1227,9 +1991,56 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 3 +; RV64-NEXT: slli a3, a3, 4 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a4, a3, 4 +; RV64-NEXT: sub a3, a4, a3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 14 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 13 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 12 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 11 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 10 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a4, a3, 3 +; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV64-NEXT: vl8re64.v v24, (a0) ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill @@ -1250,7 +2061,54 @@ ; RV64-NEXT: vs8r.v v8, (a0) ; RV64-NEXT: vs8r.v v24, (a2) ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1855,11 +2713,101 @@ ; ; RV64-LABEL: mgather_baseidx_nxv8i8_nxv8f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf8 v24, v8 ; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs %v = call @llvm.masked.gather.nxv8f64.nxv8p0( %ptrs, i32 8, %m, %passthru) @@ -1879,11 +2827,101 @@ ; ; RV64-LABEL: mgather_baseidx_sext_nxv8i8_nxv8f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf8 v24, v8 ; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1920,11 +2958,101 @@ ; ; RV64-LABEL: mgather_baseidx_nxv8i16_nxv8f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf4 v24, v8 ; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs %v = call @llvm.masked.gather.nxv8f64.nxv8p0( %ptrs, i32 8, %m, %passthru) @@ -1944,11 +3072,101 @@ ; ; RV64-LABEL: mgather_baseidx_sext_nxv8i16_nxv8f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf4 v24, v8 ; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1984,11 +3202,101 @@ ; ; RV64-LABEL: mgather_baseidx_nxv8i32_nxv8f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf2 v24, v8 ; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs %v = call @llvm.masked.gather.nxv8f64.nxv8p0( %ptrs, i32 8, %m, %passthru) @@ -2007,11 +3315,101 @@ ; ; RV64-LABEL: mgather_baseidx_sext_nxv8i32_nxv8f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vsext.vf2 v24, v8 ; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -2031,11 +3429,101 @@ ; ; RV64-LABEL: mgather_baseidx_zext_nxv8i32_nxv8f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu ; RV64-NEXT: vzext.vf2 v24, v8 ; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: vluxei64.v v16, (a0), v8, v0.t ; RV64-NEXT: vmv.v.v v8, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -2046,12 +3534,56 @@ define @mgather_baseidx_nxv8f64(ptr %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_nxv8f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; RV32-NEXT: vnsrl.wi v24, v8, 0 ; RV32-NEXT: vsll.vi v8, v24, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_baseidx_nxv8f64: @@ -2106,6 +3638,52 @@ define @mgather_baseidx_nxv32i8(ptr %base, %idxs, %m, %passthru) { ; RV32-LABEL: mgather_baseidx_nxv32i8: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v16, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: srli a1, a1, 2 @@ -2121,10 +3699,100 @@ ; RV32-NEXT: vmv1r.v v0, v16 ; RV32-NEXT: vluxei32.v v12, (a0), v24, v0.t ; RV32-NEXT: vmv4r.v v8, v12 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: mgather_baseidx_nxv32i8: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v16, v0 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a2, a1, 2 @@ -2154,6 +3822,50 @@ ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v15, (a0), v16, v0.t ; RV64-NEXT: vmv4r.v v8, v12 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i8, ptr %base, %idxs %v = call @llvm.masked.gather.nxv32i8.nxv32p0( %ptrs, i32 2, %m, %passthru) diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll @@ -781,10 +781,100 @@ ; ; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, %idxs call void @llvm.masked.scatter.nxv8i64.nxv8p0( %val, %ptrs, i32 8, %m) @@ -803,10 +893,100 @@ ; ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -841,10 +1021,100 @@ ; ; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, %idxs call void @llvm.masked.scatter.nxv8i64.nxv8p0( %val, %ptrs, i32 8, %m) @@ -863,10 +1133,100 @@ ; ; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -900,10 +1260,100 @@ ; ; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, %idxs call void @llvm.masked.scatter.nxv8i64.nxv8p0( %val, %ptrs, i32 8, %m) @@ -921,10 +1371,100 @@ ; ; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -943,10 +1483,100 @@ ; ; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vzext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -957,11 +1587,55 @@ define void @mscatter_baseidx_nxv8i64( %val, ptr %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_nxv8i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; RV32-NEXT: vnsrl.wi v24, v16, 0 ; RV32-NEXT: vsll.vi v16, v24, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_nxv8i64: @@ -1503,10 +2177,100 @@ ; ; RV64-LABEL: mscatter_baseidx_nxv8i8_nxv8f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs call void @llvm.masked.scatter.nxv8f64.nxv8p0( %val, %ptrs, i32 8, %m) @@ -1525,10 +2289,100 @@ ; ; RV64-LABEL: mscatter_baseidx_sext_nxv8i8_nxv8f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1563,10 +2417,100 @@ ; ; RV64-LABEL: mscatter_baseidx_nxv8i16_nxv8f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs call void @llvm.masked.scatter.nxv8f64.nxv8p0( %val, %ptrs, i32 8, %m) @@ -1585,10 +2529,100 @@ ; ; RV64-LABEL: mscatter_baseidx_sext_nxv8i16_nxv8f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1622,10 +2656,100 @@ ; ; RV64-LABEL: mscatter_baseidx_nxv8i32_nxv8f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs call void @llvm.masked.scatter.nxv8f64.nxv8p0( %val, %ptrs, i32 8, %m) @@ -1643,10 +2767,100 @@ ; ; RV64-LABEL: mscatter_baseidx_sext_nxv8i32_nxv8f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1665,10 +2879,100 @@ ; ; RV64-LABEL: mscatter_baseidx_zext_nxv8i32_nxv8f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 3 +; RV64-NEXT: sub a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a2, 6 +; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 2 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 2 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a2, a1, 1 +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vzext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1679,11 +2983,55 @@ define void @mscatter_baseidx_nxv8f64( %val, ptr %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_nxv8f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; RV32-NEXT: vnsrl.wi v24, v16, 0 ; RV32-NEXT: vsll.vi v16, v24, 3 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_nxv8f64: @@ -1705,6 +3053,52 @@ define void @mscatter_nxv16f64( %val0, %val1, %ptrs0, %ptrs1, %m) { ; RV32-LABEL: mscatter_nxv16f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: sub a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 6 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 2 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 1 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vl4re32.v v24, (a0) ; RV32-NEXT: vl4re32.v v28, (a1) ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma @@ -1715,6 +3109,50 @@ ; RV32-NEXT: vslidedown.vx v0, v0, a0 ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_nxv16f64: @@ -1722,9 +3160,56 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: slli a2, a2, 4 ; RV64-NEXT: sub sp, sp, a2 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 4 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 14 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 13 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 12 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 11 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 10 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vl8re64.v v24, (a0) ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill @@ -1740,7 +3225,54 @@ ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (zero), v16, v0.t ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -1755,6 +3287,64 @@ define void @mscatter_baseidx_nxv16i8_nxv16f64( %val0, %val1, ptr %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_nxv16i8_nxv16f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 10 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: sub a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 6 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 2 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 1 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vl2r.v v2, (a1) ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; RV32-NEXT: vsext.vf4 v24, v2 @@ -1767,10 +3357,124 @@ ; RV32-NEXT: vslidedown.vx v0, v0, a1 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_nxv16i8_nxv16f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 10 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vl2r.v v2, (a1) ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v24, v2 @@ -1784,6 +3488,62 @@ ; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs %v0 = call @llvm.vector.insert.nxv8f64.nxv16f64( undef, %val0, i64 0) @@ -1795,6 +3555,76 @@ define void @mscatter_baseidx_nxv16i16_nxv16f64( %val0, %val1, ptr %base, %idxs, %m) { ; RV32-LABEL: mscatter_baseidx_nxv16i16_nxv16f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 12 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 12 * vlenb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 11 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 10 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: sub a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 6 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 2 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 1 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vl4re16.v v4, (a1) ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; RV32-NEXT: vsext.vf2 v24, v4 @@ -1807,10 +3637,148 @@ ; RV32-NEXT: vslidedown.vx v0, v0, a1 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: mscatter_baseidx_nxv16i16_nxv16f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 12 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 12 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 11 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 10 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vl4re16.v v4, (a1) ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v4 @@ -1824,6 +3792,74 @@ ; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs %v0 = call @llvm.vector.insert.nxv8f64.nxv16f64( undef, %val0, i64 0) diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll @@ -495,6 +495,52 @@ define @reverse_nxv64i1( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv64i1: ; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, -16 +; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16 +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 3 +; RV32-BITS-UNKNOWN-NEXT: sub sp, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 3 +; RV32-BITS-UNKNOWN-NEXT: sub a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 6 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 2 +; RV32-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 2 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: addi a0, sp, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vid.v v8 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb @@ -510,10 +556,100 @@ ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vand.vi v8, v24, 1 ; RV32-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 3 +; RV32-BITS-UNKNOWN-NEXT: sub a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 6 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 2 +; RV32-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 2 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: addi a0, sp, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 3 +; RV32-BITS-UNKNOWN-NEXT: add sp, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, 16 ; RV32-BITS-UNKNOWN-NEXT: ret ; ; RV32-BITS-256-LABEL: reverse_nxv64i1: ; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: addi sp, sp, -16 +; RV32-BITS-256-NEXT: .cfi_def_cfa_offset 16 +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a0, a0, 3 +; RV32-BITS-256-NEXT: sub sp, sp, a0 +; RV32-BITS-256-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 3 +; RV32-BITS-256-NEXT: sub a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 6 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 2 +; RV32-BITS-256-NEXT: add a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a0, a0, 2 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 1 +; RV32-BITS-256-NEXT: add a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a0, a0, 1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: addi a0, sp, 16 +; RV32-BITS-256-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV32-BITS-256-NEXT: vmv.v.i v8, 0 ; RV32-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 @@ -525,10 +661,100 @@ ; RV32-BITS-256-NEXT: vrgather.vv v24, v8, v16 ; RV32-BITS-256-NEXT: vand.vi v8, v24, 1 ; RV32-BITS-256-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 3 +; RV32-BITS-256-NEXT: sub a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 6 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 2 +; RV32-BITS-256-NEXT: add a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a0, a0, 2 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 1 +; RV32-BITS-256-NEXT: add a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a0, a0, 1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: addi a0, sp, 16 +; RV32-BITS-256-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a0, a0, 3 +; RV32-BITS-256-NEXT: add sp, sp, a0 +; RV32-BITS-256-NEXT: addi sp, sp, 16 ; RV32-BITS-256-NEXT: ret ; ; RV32-BITS-512-LABEL: reverse_nxv64i1: ; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: addi sp, sp, -16 +; RV32-BITS-512-NEXT: .cfi_def_cfa_offset 16 +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a0, a0, 3 +; RV32-BITS-512-NEXT: sub sp, sp, a0 +; RV32-BITS-512-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a1, a0, 3 +; RV32-BITS-512-NEXT: sub a0, a1, a0 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 6 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a1, a0, 2 +; RV32-BITS-512-NEXT: add a0, a1, a0 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a0, a0, 2 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a1, a0, 1 +; RV32-BITS-512-NEXT: add a0, a1, a0 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a0, a0, 1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: addi a0, sp, 16 +; RV32-BITS-512-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; RV32-BITS-512-NEXT: vid.v v8 ; RV32-BITS-512-NEXT: csrr a0, vlenb @@ -544,10 +770,100 @@ ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV32-BITS-512-NEXT: vand.vi v8, v24, 1 ; RV32-BITS-512-NEXT: vmsne.vi v0, v8, 0 +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a1, a0, 3 +; RV32-BITS-512-NEXT: sub a0, a1, a0 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 6 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a1, a0, 2 +; RV32-BITS-512-NEXT: add a0, a1, a0 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a0, a0, 2 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a1, a0, 1 +; RV32-BITS-512-NEXT: add a0, a1, a0 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a0, a0, 1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: addi a0, sp, 16 +; RV32-BITS-512-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a0, a0, 3 +; RV32-BITS-512-NEXT: add sp, sp, a0 +; RV32-BITS-512-NEXT: addi sp, sp, 16 ; RV32-BITS-512-NEXT: ret ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv64i1: ; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, -16 +; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16 +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 3 +; RV64-BITS-UNKNOWN-NEXT: sub sp, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 3 +; RV64-BITS-UNKNOWN-NEXT: sub a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 6 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 2 +; RV64-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 1 +; RV64-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: addi a0, sp, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v8 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb @@ -563,10 +879,100 @@ ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vand.vi v8, v24, 1 ; RV64-BITS-UNKNOWN-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 3 +; RV64-BITS-UNKNOWN-NEXT: sub a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 6 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 2 +; RV64-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 1 +; RV64-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: addi a0, sp, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 3 +; RV64-BITS-UNKNOWN-NEXT: add sp, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, 16 ; RV64-BITS-UNKNOWN-NEXT: ret ; ; RV64-BITS-256-LABEL: reverse_nxv64i1: ; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: addi sp, sp, -16 +; RV64-BITS-256-NEXT: .cfi_def_cfa_offset 16 +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 3 +; RV64-BITS-256-NEXT: sub sp, sp, a0 +; RV64-BITS-256-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 3 +; RV64-BITS-256-NEXT: sub a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 6 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 2 +; RV64-BITS-256-NEXT: add a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 2 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 1 +; RV64-BITS-256-NEXT: add a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: addi a0, sp, 16 +; RV64-BITS-256-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV64-BITS-256-NEXT: vmv.v.i v8, 0 ; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0 @@ -578,10 +984,100 @@ ; RV64-BITS-256-NEXT: vrgather.vv v24, v8, v16 ; RV64-BITS-256-NEXT: vand.vi v8, v24, 1 ; RV64-BITS-256-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 3 +; RV64-BITS-256-NEXT: sub a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 6 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 2 +; RV64-BITS-256-NEXT: add a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 2 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 1 +; RV64-BITS-256-NEXT: add a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: addi a0, sp, 16 +; RV64-BITS-256-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 3 +; RV64-BITS-256-NEXT: add sp, sp, a0 +; RV64-BITS-256-NEXT: addi sp, sp, 16 ; RV64-BITS-256-NEXT: ret ; ; RV64-BITS-512-LABEL: reverse_nxv64i1: ; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: addi sp, sp, -16 +; RV64-BITS-512-NEXT: .cfi_def_cfa_offset 16 +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a0, a0, 3 +; RV64-BITS-512-NEXT: sub sp, sp, a0 +; RV64-BITS-512-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a1, a0, 3 +; RV64-BITS-512-NEXT: sub a0, a1, a0 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 6 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a1, a0, 2 +; RV64-BITS-512-NEXT: add a0, a1, a0 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a0, a0, 2 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a1, a0, 1 +; RV64-BITS-512-NEXT: add a0, a1, a0 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a0, a0, 1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: addi a0, sp, 16 +; RV64-BITS-512-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; RV64-BITS-512-NEXT: vid.v v8 ; RV64-BITS-512-NEXT: csrr a0, vlenb @@ -597,6 +1093,50 @@ ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV64-BITS-512-NEXT: vand.vi v8, v24, 1 ; RV64-BITS-512-NEXT: vmsne.vi v0, v8, 0 +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a1, a0, 3 +; RV64-BITS-512-NEXT: sub a0, a1, a0 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 6 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a1, a0, 2 +; RV64-BITS-512-NEXT: add a0, a1, a0 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a0, a0, 2 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a1, a0, 1 +; RV64-BITS-512-NEXT: add a0, a1, a0 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a0, a0, 1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: addi a0, sp, 16 +; RV64-BITS-512-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a0, a0, 3 +; RV64-BITS-512-NEXT: add sp, sp, a0 +; RV64-BITS-512-NEXT: addi sp, sp, 16 ; RV64-BITS-512-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv64i1( %a) ret %res @@ -1071,6 +1611,52 @@ define @reverse_nxv64i8( %a) { ; RV32-BITS-UNKNOWN-LABEL: reverse_nxv64i8: ; RV32-BITS-UNKNOWN: # %bb.0: +; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, -16 +; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16 +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 3 +; RV32-BITS-UNKNOWN-NEXT: sub sp, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 3 +; RV32-BITS-UNKNOWN-NEXT: sub a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 6 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 2 +; RV32-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 2 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: addi a0, sp, 16 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vid.v v16 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb @@ -1081,10 +1667,100 @@ ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v20, v8, v24 ; RV32-BITS-UNKNOWN-NEXT: vrgatherei16.vv v16, v12, v24 ; RV32-BITS-UNKNOWN-NEXT: vmv8r.v v8, v16 +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 3 +; RV32-BITS-UNKNOWN-NEXT: sub a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 6 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 2 +; RV32-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 2 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: addi a0, sp, 16 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 3 +; RV32-BITS-UNKNOWN-NEXT: add sp, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, 16 ; RV32-BITS-UNKNOWN-NEXT: ret ; ; RV32-BITS-256-LABEL: reverse_nxv64i8: ; RV32-BITS-256: # %bb.0: +; RV32-BITS-256-NEXT: addi sp, sp, -16 +; RV32-BITS-256-NEXT: .cfi_def_cfa_offset 16 +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a0, a0, 3 +; RV32-BITS-256-NEXT: sub sp, sp, a0 +; RV32-BITS-256-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 3 +; RV32-BITS-256-NEXT: sub a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 6 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 2 +; RV32-BITS-256-NEXT: add a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a0, a0, 2 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 1 +; RV32-BITS-256-NEXT: add a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a0, a0, 1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: addi a0, sp, 16 +; RV32-BITS-256-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32-BITS-256-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; RV32-BITS-256-NEXT: vid.v v16 ; RV32-BITS-256-NEXT: csrr a0, vlenb @@ -1093,10 +1769,77 @@ ; RV32-BITS-256-NEXT: vrsub.vx v24, v16, a0 ; RV32-BITS-256-NEXT: vrgather.vv v16, v8, v24 ; RV32-BITS-256-NEXT: vmv.v.v v8, v16 +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 3 +; RV32-BITS-256-NEXT: sub a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 6 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 2 +; RV32-BITS-256-NEXT: add a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a0, a0, 2 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 1 +; RV32-BITS-256-NEXT: add a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a0, a0, 1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 16 +; RV32-BITS-256-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: addi a0, sp, 16 +; RV32-BITS-256-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a0, a0, 3 +; RV32-BITS-256-NEXT: add sp, sp, a0 +; RV32-BITS-256-NEXT: addi sp, sp, 16 ; RV32-BITS-256-NEXT: ret ; ; RV32-BITS-512-LABEL: reverse_nxv64i8: ; RV32-BITS-512: # %bb.0: +; RV32-BITS-512-NEXT: addi sp, sp, -16 +; RV32-BITS-512-NEXT: .cfi_def_cfa_offset 16 +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a0, a0, 2 +; RV32-BITS-512-NEXT: sub sp, sp, a0 +; RV32-BITS-512-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a1, a0, 1 +; RV32-BITS-512-NEXT: add a0, a1, a0 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a0, a0, 1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: addi a0, sp, 16 +; RV32-BITS-512-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill ; RV32-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; RV32-BITS-512-NEXT: vid.v v16 ; RV32-BITS-512-NEXT: csrr a0, vlenb @@ -1106,10 +1849,77 @@ ; RV32-BITS-512-NEXT: vrgather.vv v20, v8, v24 ; RV32-BITS-512-NEXT: vrgather.vv v16, v12, v24 ; RV32-BITS-512-NEXT: vmv8r.v v8, v16 +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a1, a0, 1 +; RV32-BITS-512-NEXT: add a0, a1, a0 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a0, a0, 1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 16 +; RV32-BITS-512-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: addi a0, sp, 16 +; RV32-BITS-512-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a0, a0, 2 +; RV32-BITS-512-NEXT: add sp, sp, a0 +; RV32-BITS-512-NEXT: addi sp, sp, 16 ; RV32-BITS-512-NEXT: ret ; ; RV64-BITS-UNKNOWN-LABEL: reverse_nxv64i8: ; RV64-BITS-UNKNOWN: # %bb.0: +; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, -16 +; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 16 +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 3 +; RV64-BITS-UNKNOWN-NEXT: sub sp, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 3 +; RV64-BITS-UNKNOWN-NEXT: sub a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 6 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 2 +; RV64-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 1 +; RV64-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: addi a0, sp, 16 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV64-BITS-UNKNOWN-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v16 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb @@ -1120,22 +1930,179 @@ ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v20, v8, v24 ; RV64-BITS-UNKNOWN-NEXT: vrgatherei16.vv v16, v12, v24 ; RV64-BITS-UNKNOWN-NEXT: vmv8r.v v8, v16 -; RV64-BITS-UNKNOWN-NEXT: ret -; -; RV64-BITS-256-LABEL: reverse_nxv64i8: -; RV64-BITS-256: # %bb.0: -; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV64-BITS-256-NEXT: vid.v v16 -; RV64-BITS-256-NEXT: csrr a0, vlenb -; RV64-BITS-256-NEXT: slli a0, a0, 3 -; RV64-BITS-256-NEXT: addi a0, a0, -1 -; RV64-BITS-256-NEXT: vrsub.vx v24, v16, a0 -; RV64-BITS-256-NEXT: vrgather.vv v16, v8, v24 +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 3 +; RV64-BITS-UNKNOWN-NEXT: sub a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 6 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 2 +; RV64-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 1 +; RV64-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: addi a0, sp, 16 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 3 +; RV64-BITS-UNKNOWN-NEXT: add sp, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, 16 +; RV64-BITS-UNKNOWN-NEXT: ret +; +; RV64-BITS-256-LABEL: reverse_nxv64i8: +; RV64-BITS-256: # %bb.0: +; RV64-BITS-256-NEXT: addi sp, sp, -16 +; RV64-BITS-256-NEXT: .cfi_def_cfa_offset 16 +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 3 +; RV64-BITS-256-NEXT: sub sp, sp, a0 +; RV64-BITS-256-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 3 +; RV64-BITS-256-NEXT: sub a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 6 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 2 +; RV64-BITS-256-NEXT: add a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 2 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 1 +; RV64-BITS-256-NEXT: add a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: addi a0, sp, 16 +; RV64-BITS-256-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: vsetvli a0, zero, e8, m8, ta, ma +; RV64-BITS-256-NEXT: vid.v v16 +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 3 +; RV64-BITS-256-NEXT: addi a0, a0, -1 +; RV64-BITS-256-NEXT: vrsub.vx v24, v16, a0 +; RV64-BITS-256-NEXT: vrgather.vv v16, v8, v24 ; RV64-BITS-256-NEXT: vmv.v.v v8, v16 +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 3 +; RV64-BITS-256-NEXT: sub a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 6 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 2 +; RV64-BITS-256-NEXT: add a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 2 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 1 +; RV64-BITS-256-NEXT: add a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 16 +; RV64-BITS-256-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: addi a0, sp, 16 +; RV64-BITS-256-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a0, a0, 3 +; RV64-BITS-256-NEXT: add sp, sp, a0 +; RV64-BITS-256-NEXT: addi sp, sp, 16 ; RV64-BITS-256-NEXT: ret ; ; RV64-BITS-512-LABEL: reverse_nxv64i8: ; RV64-BITS-512: # %bb.0: +; RV64-BITS-512-NEXT: addi sp, sp, -16 +; RV64-BITS-512-NEXT: .cfi_def_cfa_offset 16 +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a0, a0, 2 +; RV64-BITS-512-NEXT: sub sp, sp, a0 +; RV64-BITS-512-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a1, a0, 1 +; RV64-BITS-512-NEXT: add a0, a1, a0 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a0, a0, 1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: addi a0, sp, 16 +; RV64-BITS-512-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill ; RV64-BITS-512-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; RV64-BITS-512-NEXT: vid.v v16 ; RV64-BITS-512-NEXT: csrr a0, vlenb @@ -1145,6 +2112,27 @@ ; RV64-BITS-512-NEXT: vrgather.vv v20, v8, v24 ; RV64-BITS-512-NEXT: vrgather.vv v16, v12, v24 ; RV64-BITS-512-NEXT: vmv8r.v v8, v16 +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a1, a0, 1 +; RV64-BITS-512-NEXT: add a0, a1, a0 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a0, a0, 1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 16 +; RV64-BITS-512-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: addi a0, sp, 16 +; RV64-BITS-512-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a0, a0, 2 +; RV64-BITS-512-NEXT: add sp, sp, a0 +; RV64-BITS-512-NEXT: addi sp, sp, 16 ; RV64-BITS-512-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv64i8( %a) ret %res @@ -1232,6 +2220,52 @@ define @reverse_nxv32i16( %a) { ; CHECK-LABEL: reverse_nxv32i16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: csrr a0, vlenb @@ -1240,6 +2274,50 @@ ; CHECK-NEXT: vrsub.vx v24, v16, a0 ; CHECK-NEXT: vrgather.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv32i16( %a) ret %res @@ -1311,6 +2389,52 @@ define @reverse_nxv16i32( %a) { ; CHECK-LABEL: reverse_nxv16i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: csrr a0, vlenb @@ -1319,6 +2443,50 @@ ; CHECK-NEXT: vrsub.vx v24, v16, a0 ; CHECK-NEXT: vrgather.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv16i32( %a) ret %res @@ -1375,6 +2543,52 @@ define @reverse_nxv8i64( %a) { ; CHECK-LABEL: reverse_nxv8i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma @@ -1382,6 +2596,50 @@ ; CHECK-NEXT: vrsub.vx v24, v16, a0 ; CHECK-NEXT: vrgather.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv8i64( %a) ret %res @@ -1473,6 +2731,52 @@ define @reverse_nxv32f16( %a) { ; CHECK-LABEL: reverse_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: csrr a0, vlenb @@ -1481,6 +2785,50 @@ ; CHECK-NEXT: vrsub.vx v24, v16, a0 ; CHECK-NEXT: vrgather.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv32f16( %a) ret %res @@ -1552,6 +2900,52 @@ define @reverse_nxv16f32( %a) { ; CHECK-LABEL: reverse_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: csrr a0, vlenb @@ -1560,6 +2954,50 @@ ; CHECK-NEXT: vrsub.vx v24, v16, a0 ; CHECK-NEXT: vrgather.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv16f32( %a) ret %res @@ -1616,6 +3054,52 @@ define @reverse_nxv8f64( %a) { ; CHECK-LABEL: reverse_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma @@ -1623,6 +3107,50 @@ ; CHECK-NEXT: vrsub.vx v24, v16, a0 ; CHECK-NEXT: vrgather.vv v16, v8, v24 ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv8f64( %a) ret %res @@ -1651,6 +3179,52 @@ define @reverse_nxv6i64( %a) { ; CHECK-LABEL: reverse_nxv6i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma @@ -1660,6 +3234,50 @@ ; CHECK-NEXT: vmv2r.v v8, v26 ; CHECK-NEXT: vmv2r.v v10, v28 ; CHECK-NEXT: vmv2r.v v12, v30 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.reverse.nxv6i64( %a) ret %res @@ -1677,10 +3295,100 @@ ; RV32-BITS-UNKNOWN-NEXT: addi s0, sp, 80 ; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb -; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 4 +; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 5 ; RV32-BITS-UNKNOWN-NEXT: sub sp, sp, a0 ; RV32-BITS-UNKNOWN-NEXT: andi sp, sp, -64 ; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 5 +; RV32-BITS-UNKNOWN-NEXT: sub a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 30 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 29 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 28 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 27 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 26 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 25 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 24 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 23 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 22 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 21 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 20 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 19 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 18 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 4 +; RV32-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV32-BITS-UNKNOWN-NEXT: addi a1, a0, -1 ; RV32-BITS-UNKNOWN-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-BITS-UNKNOWN-NEXT: vid.v v24 @@ -1696,6 +3404,96 @@ ; RV32-BITS-UNKNOWN-NEXT: vs8r.v v16, (a1) ; RV32-BITS-UNKNOWN-NEXT: vl8re64.v v16, (a0) ; RV32-BITS-UNKNOWN-NEXT: vl8re64.v v8, (a1) +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 5 +; RV32-BITS-UNKNOWN-NEXT: sub a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 30 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 29 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 28 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 27 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 26 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 25 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 24 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 23 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 22 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 21 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 20 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 19 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: li a1, 18 +; RV32-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV32-BITS-UNKNOWN-NEXT: slli a1, a0, 4 +; RV32-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV32-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV32-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV32-BITS-UNKNOWN-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload ; RV32-BITS-UNKNOWN-NEXT: addi sp, s0, -80 ; RV32-BITS-UNKNOWN-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32-BITS-UNKNOWN-NEXT: lw s0, 72(sp) # 4-byte Folded Reload @@ -1713,10 +3511,100 @@ ; RV32-BITS-256-NEXT: addi s0, sp, 80 ; RV32-BITS-256-NEXT: .cfi_def_cfa s0, 0 ; RV32-BITS-256-NEXT: csrr a0, vlenb -; RV32-BITS-256-NEXT: slli a0, a0, 4 +; RV32-BITS-256-NEXT: slli a0, a0, 5 ; RV32-BITS-256-NEXT: sub sp, sp, a0 ; RV32-BITS-256-NEXT: andi sp, sp, -64 ; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 5 +; RV32-BITS-256-NEXT: sub a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 30 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 29 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 28 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 27 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 26 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 25 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 24 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 23 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 22 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 21 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 20 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 19 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 18 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 4 +; RV32-BITS-256-NEXT: add a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill +; RV32-BITS-256-NEXT: csrr a0, vlenb ; RV32-BITS-256-NEXT: addi a1, a0, -1 ; RV32-BITS-256-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-BITS-256-NEXT: vid.v v24 @@ -1732,6 +3620,96 @@ ; RV32-BITS-256-NEXT: vs8r.v v16, (a1) ; RV32-BITS-256-NEXT: vl8re64.v v16, (a0) ; RV32-BITS-256-NEXT: vl8re64.v v8, (a1) +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 5 +; RV32-BITS-256-NEXT: sub a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 30 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 29 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 28 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 27 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 26 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 25 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 24 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 23 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 22 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 21 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 20 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 19 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: li a1, 18 +; RV32-BITS-256-NEXT: mul a0, a0, a1 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-BITS-256-NEXT: csrr a0, vlenb +; RV32-BITS-256-NEXT: slli a1, a0, 4 +; RV32-BITS-256-NEXT: add a0, a1, a0 +; RV32-BITS-256-NEXT: add a0, sp, a0 +; RV32-BITS-256-NEXT: addi a0, a0, 64 +; RV32-BITS-256-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload ; RV32-BITS-256-NEXT: addi sp, s0, -80 ; RV32-BITS-256-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32-BITS-256-NEXT: lw s0, 72(sp) # 4-byte Folded Reload @@ -1749,10 +3727,100 @@ ; RV32-BITS-512-NEXT: addi s0, sp, 80 ; RV32-BITS-512-NEXT: .cfi_def_cfa s0, 0 ; RV32-BITS-512-NEXT: csrr a0, vlenb -; RV32-BITS-512-NEXT: slli a0, a0, 4 +; RV32-BITS-512-NEXT: slli a0, a0, 5 ; RV32-BITS-512-NEXT: sub sp, sp, a0 ; RV32-BITS-512-NEXT: andi sp, sp, -64 ; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a1, a0, 5 +; RV32-BITS-512-NEXT: sub a0, a1, a0 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 30 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 29 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 28 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 27 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 26 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 25 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 24 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 23 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 22 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 21 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 20 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 19 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 18 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a1, a0, 4 +; RV32-BITS-512-NEXT: add a0, a1, a0 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill +; RV32-BITS-512-NEXT: csrr a0, vlenb ; RV32-BITS-512-NEXT: addi a1, a0, -1 ; RV32-BITS-512-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV32-BITS-512-NEXT: vid.v v24 @@ -1768,6 +3836,96 @@ ; RV32-BITS-512-NEXT: vs8r.v v16, (a1) ; RV32-BITS-512-NEXT: vl8re64.v v16, (a0) ; RV32-BITS-512-NEXT: vl8re64.v v8, (a1) +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a1, a0, 5 +; RV32-BITS-512-NEXT: sub a0, a1, a0 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 30 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 29 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 28 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 27 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 26 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 25 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 24 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 23 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 22 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 21 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 20 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 19 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: li a1, 18 +; RV32-BITS-512-NEXT: mul a0, a0, a1 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-BITS-512-NEXT: csrr a0, vlenb +; RV32-BITS-512-NEXT: slli a1, a0, 4 +; RV32-BITS-512-NEXT: add a0, a1, a0 +; RV32-BITS-512-NEXT: add a0, sp, a0 +; RV32-BITS-512-NEXT: addi a0, a0, 64 +; RV32-BITS-512-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload ; RV32-BITS-512-NEXT: addi sp, s0, -80 ; RV32-BITS-512-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32-BITS-512-NEXT: lw s0, 72(sp) # 4-byte Folded Reload @@ -1785,10 +3943,100 @@ ; RV64-BITS-UNKNOWN-NEXT: addi s0, sp, 80 ; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb -; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 4 +; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 5 ; RV64-BITS-UNKNOWN-NEXT: sub sp, sp, a0 ; RV64-BITS-UNKNOWN-NEXT: andi sp, sp, -64 ; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 5 +; RV64-BITS-UNKNOWN-NEXT: sub a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 30 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 29 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 28 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 27 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 26 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 25 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 24 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 23 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 22 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 21 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 20 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 19 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 18 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 4 +; RV64-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb ; RV64-BITS-UNKNOWN-NEXT: addi a1, a0, -1 ; RV64-BITS-UNKNOWN-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-BITS-UNKNOWN-NEXT: vid.v v24 @@ -1804,6 +4052,96 @@ ; RV64-BITS-UNKNOWN-NEXT: vs8r.v v16, (a1) ; RV64-BITS-UNKNOWN-NEXT: vl8re64.v v16, (a0) ; RV64-BITS-UNKNOWN-NEXT: vl8re64.v v8, (a1) +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 5 +; RV64-BITS-UNKNOWN-NEXT: sub a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 30 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 29 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 28 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 27 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 26 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 25 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 24 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 23 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 22 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 21 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 20 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 19 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: li a1, 18 +; RV64-BITS-UNKNOWN-NEXT: mul a0, a0, a1 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb +; RV64-BITS-UNKNOWN-NEXT: slli a1, a0, 4 +; RV64-BITS-UNKNOWN-NEXT: add a0, a1, a0 +; RV64-BITS-UNKNOWN-NEXT: add a0, sp, a0 +; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, 64 +; RV64-BITS-UNKNOWN-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload ; RV64-BITS-UNKNOWN-NEXT: addi sp, s0, -80 ; RV64-BITS-UNKNOWN-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64-BITS-UNKNOWN-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -1821,10 +4159,100 @@ ; RV64-BITS-256-NEXT: addi s0, sp, 80 ; RV64-BITS-256-NEXT: .cfi_def_cfa s0, 0 ; RV64-BITS-256-NEXT: csrr a0, vlenb -; RV64-BITS-256-NEXT: slli a0, a0, 4 +; RV64-BITS-256-NEXT: slli a0, a0, 5 ; RV64-BITS-256-NEXT: sub sp, sp, a0 ; RV64-BITS-256-NEXT: andi sp, sp, -64 ; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 5 +; RV64-BITS-256-NEXT: sub a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 30 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 29 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 28 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 27 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 26 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 25 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 24 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 23 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 22 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 21 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 20 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 19 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 18 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 4 +; RV64-BITS-256-NEXT: add a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill +; RV64-BITS-256-NEXT: csrr a0, vlenb ; RV64-BITS-256-NEXT: addi a1, a0, -1 ; RV64-BITS-256-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-BITS-256-NEXT: vid.v v24 @@ -1840,6 +4268,96 @@ ; RV64-BITS-256-NEXT: vs8r.v v16, (a1) ; RV64-BITS-256-NEXT: vl8re64.v v16, (a0) ; RV64-BITS-256-NEXT: vl8re64.v v8, (a1) +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 5 +; RV64-BITS-256-NEXT: sub a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 30 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 29 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 28 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 27 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 26 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 25 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 24 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 23 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 22 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 21 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 20 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 19 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: li a1, 18 +; RV64-BITS-256-NEXT: mul a0, a0, a1 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-BITS-256-NEXT: csrr a0, vlenb +; RV64-BITS-256-NEXT: slli a1, a0, 4 +; RV64-BITS-256-NEXT: add a0, a1, a0 +; RV64-BITS-256-NEXT: add a0, sp, a0 +; RV64-BITS-256-NEXT: addi a0, a0, 64 +; RV64-BITS-256-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload ; RV64-BITS-256-NEXT: addi sp, s0, -80 ; RV64-BITS-256-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64-BITS-256-NEXT: ld s0, 64(sp) # 8-byte Folded Reload @@ -1857,10 +4375,100 @@ ; RV64-BITS-512-NEXT: addi s0, sp, 80 ; RV64-BITS-512-NEXT: .cfi_def_cfa s0, 0 ; RV64-BITS-512-NEXT: csrr a0, vlenb -; RV64-BITS-512-NEXT: slli a0, a0, 4 +; RV64-BITS-512-NEXT: slli a0, a0, 5 ; RV64-BITS-512-NEXT: sub sp, sp, a0 ; RV64-BITS-512-NEXT: andi sp, sp, -64 ; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a1, a0, 5 +; RV64-BITS-512-NEXT: sub a0, a1, a0 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 30 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 29 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 28 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 27 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 26 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 25 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 24 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 23 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 22 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 21 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 20 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 19 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 18 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a1, a0, 4 +; RV64-BITS-512-NEXT: add a0, a1, a0 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill +; RV64-BITS-512-NEXT: csrr a0, vlenb ; RV64-BITS-512-NEXT: addi a1, a0, -1 ; RV64-BITS-512-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-BITS-512-NEXT: vid.v v24 @@ -1876,6 +4484,96 @@ ; RV64-BITS-512-NEXT: vs8r.v v16, (a1) ; RV64-BITS-512-NEXT: vl8re64.v v16, (a0) ; RV64-BITS-512-NEXT: vl8re64.v v8, (a1) +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a1, a0, 5 +; RV64-BITS-512-NEXT: sub a0, a1, a0 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 30 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 29 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 28 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 27 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 26 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 25 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 24 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 23 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 22 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 21 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 20 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 19 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: li a1, 18 +; RV64-BITS-512-NEXT: mul a0, a0, a1 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-BITS-512-NEXT: csrr a0, vlenb +; RV64-BITS-512-NEXT: slli a1, a0, 4 +; RV64-BITS-512-NEXT: add a0, a1, a0 +; RV64-BITS-512-NEXT: add a0, sp, a0 +; RV64-BITS-512-NEXT: addi a0, a0, 64 +; RV64-BITS-512-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload ; RV64-BITS-512-NEXT: addi sp, s0, -80 ; RV64-BITS-512-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64-BITS-512-NEXT: ld s0, 64(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.nearbyint.nxv1f16(, , i32) @@ -233,6 +233,52 @@ define @vp_nearbyint_nxv32f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI10_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI10_0)(a1) @@ -248,6 +294,50 @@ ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv32f16( %va, %m, i32 %evl) ret %v @@ -459,6 +549,52 @@ define @vp_nearbyint_nxv16f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -474,6 +610,50 @@ ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv16f32( %va, %m, i32 %evl) ret %v @@ -641,6 +821,52 @@ define @vp_nearbyint_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) @@ -656,6 +882,50 @@ ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv7f64( %va, %m, i32 %evl) ret %v @@ -687,6 +957,52 @@ define @vp_nearbyint_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) @@ -702,6 +1018,50 @@ ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv8f64( %va, %m, i32 %evl) ret %v @@ -734,6 +1094,64 @@ define @vp_nearbyint_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 10 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 @@ -775,6 +1193,62 @@ ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv16f64( %va, %m, i32 %evl) ret %v @@ -783,6 +1257,52 @@ define @vp_nearbyint_nxv16f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) @@ -812,6 +1332,50 @@ ; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/pr63596.ll b/llvm/test/CodeGen/RISCV/rvv/pr63596.ll --- a/llvm/test/CodeGen/RISCV/rvv/pr63596.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr63596.ll @@ -1,52 +1,220 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc < %s -mtriple=riscv64 -mattr=+v -target-abi=lp64d | FileCheck %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v -target-abi=lp64d | FileCheck %s define <4 x float> @foo(ptr %0) nounwind { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -48 -; CHECK-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s1, 24(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s2, 16(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi sp, sp, -64 +; CHECK-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s0, 48(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s1, 40(sp) # 8-byte Folded Spill +; CHECK-NEXT: sd s2, 32(sp) # 8-byte Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 14 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 32 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 13 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 32 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 12 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 32 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 11 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 32 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 10 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 32 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 32 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 32 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 32 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 32 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 32 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 32 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 32 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 32 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 32 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 32 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: lhu s0, 6(a0) ; CHECK-NEXT: lhu s1, 4(a0) ; CHECK-NEXT: lhu s2, 0(a0) ; CHECK-NEXT: lhu a0, 2(a0) ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: fsw fa0, 8(sp) +; CHECK-NEXT: fsw fa0, 24(sp) ; CHECK-NEXT: fmv.w.x fa0, s2 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: fsw fa0, 0(sp) +; CHECK-NEXT: fsw fa0, 16(sp) ; CHECK-NEXT: fmv.w.x fa0, s1 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: fsw fa0, 12(sp) +; CHECK-NEXT: fsw fa0, 28(sp) ; CHECK-NEXT: fmv.w.x fa0, s0 ; CHECK-NEXT: call __extendhfsf2@plt -; CHECK-NEXT: fsw fa0, 4(sp) -; CHECK-NEXT: addi a0, sp, 8 +; CHECK-NEXT: fsw fa0, 20(sp) +; CHECK-NEXT: addi a0, sp, 24 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: mv a0, sp +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 -; CHECK-NEXT: addi a0, sp, 12 +; CHECK-NEXT: addi a0, sp, 28 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 2 -; CHECK-NEXT: addi a0, sp, 4 +; CHECK-NEXT: addi a0, sp, 20 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 3 -; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s1, 24(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s2, 16(sp) # 8-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 48 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 32 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 32 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: ld ra, 56(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s0, 48(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s1, 40(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld s2, 32(sp) # 8-byte Folded Reload +; CHECK-NEXT: addi sp, sp, 64 ; CHECK-NEXT: ret %2 = load <4 x half>, ptr %0, align 2 %3 = fpext <4 x half> %2 to <4 x float> diff --git a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rint-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.rint.nxv1f16(, , i32) @@ -213,6 +213,52 @@ define @vp_rint_nxv32f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI10_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI10_0)(a1) @@ -226,6 +272,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.rint.nxv32f16( %va, %m, i32 %evl) ret %v @@ -419,6 +509,52 @@ define @vp_rint_nxv16f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -432,6 +568,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.rint.nxv16f32( %va, %m, i32 %evl) ret %v @@ -585,6 +765,52 @@ define @vp_rint_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) @@ -598,6 +824,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.rint.nxv7f64( %va, %m, i32 %evl) ret %v @@ -627,6 +897,52 @@ define @vp_rint_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) @@ -640,6 +956,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.rint.nxv8f64( %va, %m, i32 %evl) ret %v @@ -673,9 +1033,68 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 14 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 13 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 12 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 11 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 10 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 @@ -717,7 +1136,66 @@ ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -728,6 +1206,52 @@ define @vp_rint_nxv16f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_rint_nxv16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) @@ -753,6 +1277,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/round-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.round.nxv1f16(, , i32) @@ -233,6 +233,52 @@ define @vp_round_nxv32f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI10_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI10_0)(a1) @@ -248,6 +294,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv32f16( %va, %m, i32 %evl) ret %v @@ -459,6 +549,52 @@ define @vp_round_nxv16f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -474,6 +610,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv16f32( %va, %m, i32 %evl) ret %v @@ -641,6 +821,52 @@ define @vp_round_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) @@ -656,6 +882,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv7f64( %va, %m, i32 %evl) ret %v @@ -687,6 +957,52 @@ define @vp_round_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) @@ -702,6 +1018,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.round.nxv8f64( %va, %m, i32 %evl) ret %v @@ -737,9 +1097,68 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 14 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 13 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 12 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 11 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 10 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 @@ -788,7 +1207,66 @@ ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -799,6 +1277,52 @@ define @vp_round_nxv16f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) @@ -828,6 +1352,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.roundeven.nxv1f16(, , i32) @@ -233,6 +233,52 @@ define @vp_roundeven_nxv32f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI10_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI10_0)(a1) @@ -248,6 +294,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv32f16( %va, %m, i32 %evl) ret %v @@ -459,6 +549,52 @@ define @vp_roundeven_nxv16f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -474,6 +610,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv16f32( %va, %m, i32 %evl) ret %v @@ -641,6 +821,52 @@ define @vp_roundeven_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) @@ -656,6 +882,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv7f64( %va, %m, i32 %evl) ret %v @@ -687,6 +957,52 @@ define @vp_roundeven_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) @@ -702,6 +1018,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.roundeven.nxv8f64( %va, %m, i32 %evl) ret %v @@ -737,9 +1097,68 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 14 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 13 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 12 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 11 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 10 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 @@ -788,7 +1207,66 @@ ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -799,6 +1277,52 @@ define @vp_roundeven_nxv16f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) @@ -828,6 +1352,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundtozero-vp.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.roundtozero.nxv1f16(, , i32) @@ -233,6 +233,52 @@ define @vp_roundtozero_nxv32f16( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI10_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI10_0)(a1) @@ -248,6 +294,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv32f16( %va, %m, i32 %evl) ret %v @@ -459,6 +549,52 @@ define @vp_roundtozero_nxv16f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8, v0.t @@ -474,6 +610,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv16f32( %va, %m, i32 %evl) ret %v @@ -641,6 +821,52 @@ define @vp_roundtozero_nxv7f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI28_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI28_0)(a1) @@ -656,6 +882,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv7f64( %va, %m, i32 %evl) ret %v @@ -687,6 +957,52 @@ define @vp_roundtozero_nxv8f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v16, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI30_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI30_0)(a1) @@ -702,6 +1018,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.roundtozero.nxv8f64( %va, %m, i32 %evl) ret %v @@ -737,9 +1097,68 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 14 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 13 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 12 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 11 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 10 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 @@ -788,7 +1207,66 @@ ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -799,6 +1277,52 @@ define @vp_roundtozero_nxv16f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv16f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: sub a2, a0, a1 ; CHECK-NEXT: lui a3, %hi(.LCPI33_0) @@ -828,6 +1352,50 @@ ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d -O0 < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -O0 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O0 %s -; RUN: llc -mtriple=riscv32 -mattr=+v,+d -O2 < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d -O2 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O2 %s @.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1 @@ -12,8 +12,95 @@ ; SPILL-O0-NEXT: addi sp, sp, -32 ; SPILL-O0-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; SPILL-O0-NEXT: csrr a1, vlenb -; SPILL-O0-NEXT: slli a1, a1, 1 +; SPILL-O0-NEXT: li a2, 18 +; SPILL-O0-NEXT: mul a1, a1, a2 ; SPILL-O0-NEXT: sub sp, sp, a1 +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: slli a1, a1, 4 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 +; SPILL-O0-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: slli a2, a1, 4 +; SPILL-O0-NEXT: sub a1, a2, a1 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 +; SPILL-O0-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: li a2, 14 +; SPILL-O0-NEXT: mul a1, a1, a2 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 +; SPILL-O0-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: li a2, 13 +; SPILL-O0-NEXT: mul a1, a1, a2 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 +; SPILL-O0-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: li a2, 12 +; SPILL-O0-NEXT: mul a1, a1, a2 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 +; SPILL-O0-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: li a2, 11 +; SPILL-O0-NEXT: mul a1, a1, a2 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 +; SPILL-O0-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: li a2, 10 +; SPILL-O0-NEXT: mul a1, a1, a2 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 +; SPILL-O0-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: slli a2, a1, 3 +; SPILL-O0-NEXT: add a1, a2, a1 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 +; SPILL-O0-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: slli a1, a1, 3 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: slli a2, a1, 3 +; SPILL-O0-NEXT: sub a1, a2, a1 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 +; SPILL-O0-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: li a2, 6 +; SPILL-O0-NEXT: mul a1, a1, a2 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 +; SPILL-O0-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: slli a2, a1, 2 +; SPILL-O0-NEXT: add a1, a2, a1 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 +; SPILL-O0-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: slli a1, a1, 2 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 +; SPILL-O0-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: slli a2, a1, 1 +; SPILL-O0-NEXT: add a1, a2, a1 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 +; SPILL-O0-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: slli a1, a1, 1 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 16 +; SPILL-O0-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; SPILL-O0-NEXT: sw a0, 8(sp) # 4-byte Folded Spill ; SPILL-O0-NEXT: vmv1r.v v10, v9 ; SPILL-O0-NEXT: vmv1r.v v9, v8 @@ -41,7 +128,94 @@ ; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; SPILL-O0-NEXT: vfadd.vv v8, v9, v10 ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 1 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 18 +; SPILL-O0-NEXT: mul a0, a0, a1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; SPILL-O0-NEXT: addi sp, sp, 32 @@ -53,8 +227,95 @@ ; SPILL-O2-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; SPILL-O2-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; SPILL-O2-NEXT: csrr a1, vlenb -; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: li a2, 18 +; SPILL-O2-NEXT: mul a1, a1, a2 ; SPILL-O2-NEXT: sub sp, sp, a1 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 4 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a2, a1, 4 +; SPILL-O2-NEXT: sub a1, a2, a1 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: li a2, 14 +; SPILL-O2-NEXT: mul a1, a1, a2 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: li a2, 13 +; SPILL-O2-NEXT: mul a1, a1, a2 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: li a2, 12 +; SPILL-O2-NEXT: mul a1, a1, a2 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: li a2, 11 +; SPILL-O2-NEXT: mul a1, a1, a2 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: li a2, 10 +; SPILL-O2-NEXT: mul a1, a1, a2 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a2, a1, 3 +; SPILL-O2-NEXT: add a1, a2, a1 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 3 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a2, a1, 3 +; SPILL-O2-NEXT: sub a1, a2, a1 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: li a2, 6 +; SPILL-O2-NEXT: mul a1, a1, a2 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a2, a1, 2 +; SPILL-O2-NEXT: add a1, a2, a1 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 2 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a2, a1, 1 +; SPILL-O2-NEXT: add a1, a2, a1 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; SPILL-O2-NEXT: mv s0, a0 ; SPILL-O2-NEXT: addi a1, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill @@ -76,7 +337,94 @@ ; SPILL-O2-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: vfadd.vv v8, v9, v8 ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 1 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; SPILL-O2-NEXT: lw s0, 24(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -O0 < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -O0 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O0 %s -; RUN: llc -mtriple=riscv32 -mattr=+v -O2 < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -O2 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O2 %s define @spill_lmul_mf2( %va) nounwind { @@ -9,8 +9,93 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: slli a0, a0, 4 ; SPILL-O0-NEXT: sub sp, sp, a0 +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 1 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP @@ -18,7 +103,92 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 1 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -27,15 +197,185 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: slli a0, a0, 4 ; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 1 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 1 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret @@ -51,8 +391,93 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: slli a0, a0, 4 ; SPILL-O0-NEXT: sub sp, sp, a0 +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 1 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP @@ -60,7 +485,92 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 1 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -69,15 +579,185 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: slli a0, a0, 4 ; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 1 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 1 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret @@ -93,8 +773,95 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: li a1, 18 +; SPILL-O0-NEXT: mul a0, a0, a1 ; SPILL-O0-NEXT: sub sp, sp, a0 +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 1 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP @@ -102,7 +869,94 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 1 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 18 +; SPILL-O0-NEXT: mul a0, a0, a1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -111,15 +965,189 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 1 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 1 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret @@ -135,8 +1163,96 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: li a1, 20 +; SPILL-O0-NEXT: mul a0, a0, a1 ; SPILL-O0-NEXT: sub sp, sp, a0 +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 18 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP @@ -144,7 +1260,95 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 18 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 20 +; SPILL-O0-NEXT: mul a0, a0, a1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -153,15 +1357,191 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: li a1, 20 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 20 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret @@ -177,8 +1557,97 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: li a1, 24 +; SPILL-O0-NEXT: mul a0, a0, a1 ; SPILL-O0-NEXT: sub sp, sp, a0 +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 22 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 21 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 20 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 19 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 18 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP @@ -186,7 +1655,96 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 22 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 21 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 20 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 19 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 18 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 24 +; SPILL-O0-NEXT: mul a0, a0, a1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -195,15 +1753,193 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: li a1, 24 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 22 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 21 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 20 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 19 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 22 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 21 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 20 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 19 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 24 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll @@ -9,8 +9,93 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: slli a2, a2, 4 ; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 4 +; SPILL-O0-NEXT: sub a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 14 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 13 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 12 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 11 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 10 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 3 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 3 +; SPILL-O0-NEXT: sub a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 6 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 2 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 1 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; SPILL-O0-NEXT: # implicit-def: $v8 ; SPILL-O0-NEXT: # implicit-def: $v9 ; SPILL-O0-NEXT: # implicit-def: $v10 @@ -27,7 +112,92 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 1 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -36,8 +206,95 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a2, vlenb -; SPILL-O2-NEXT: slli a2, a2, 1 +; SPILL-O2-NEXT: li a3, 18 +; SPILL-O2-NEXT: mul a2, a2, a3 ; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 4 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 4 +; SPILL-O2-NEXT: sub a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 14 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 13 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 12 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 11 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 10 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 3 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 3 +; SPILL-O2-NEXT: sub a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 6 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 2 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 1 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 1 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; SPILL-O2-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; SPILL-O2-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-NEXT: addi a0, sp, 16 @@ -52,7 +309,94 @@ ; SPILL-O2-NEXT: add a0, a0, a1 ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 1 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret @@ -69,8 +413,93 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: slli a2, a2, 4 ; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 4 +; SPILL-O0-NEXT: sub a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 14 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 13 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 12 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 11 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 10 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 3 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 3 +; SPILL-O0-NEXT: sub a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 6 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 2 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 1 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; SPILL-O0-NEXT: # implicit-def: $v8 ; SPILL-O0-NEXT: # implicit-def: $v9 ; SPILL-O0-NEXT: # implicit-def: $v10 @@ -87,7 +516,92 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 1 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -96,8 +610,95 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a2, vlenb -; SPILL-O2-NEXT: slli a2, a2, 1 +; SPILL-O2-NEXT: li a3, 18 +; SPILL-O2-NEXT: mul a2, a2, a3 ; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 4 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 4 +; SPILL-O2-NEXT: sub a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 14 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 13 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 12 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 11 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 10 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 3 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 3 +; SPILL-O2-NEXT: sub a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 6 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 2 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 1 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 1 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; SPILL-O2-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; SPILL-O2-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-NEXT: addi a0, sp, 16 @@ -112,7 +713,94 @@ ; SPILL-O2-NEXT: add a0, a0, a1 ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 1 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret @@ -129,8 +817,95 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: li a3, 18 +; SPILL-O0-NEXT: mul a2, a2, a3 ; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 4 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 4 +; SPILL-O0-NEXT: sub a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 14 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 13 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 12 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 11 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 10 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 3 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 3 +; SPILL-O0-NEXT: sub a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 6 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 2 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 1 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; SPILL-O0-NEXT: # implicit-def: $v8m2 ; SPILL-O0-NEXT: # implicit-def: $v10m2 ; SPILL-O0-NEXT: # implicit-def: $v12m2 @@ -147,7 +922,94 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 1 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 18 +; SPILL-O0-NEXT: mul a0, a0, a1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -156,8 +1018,96 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a2, vlenb -; SPILL-O2-NEXT: slli a2, a2, 2 +; SPILL-O2-NEXT: li a3, 20 +; SPILL-O2-NEXT: mul a2, a2, a3 ; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 18 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 4 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 4 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 4 +; SPILL-O2-NEXT: sub a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 14 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 13 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 12 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 11 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 10 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 3 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 3 +; SPILL-O2-NEXT: sub a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 6 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 2 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; SPILL-O2-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O2-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-NEXT: addi a0, sp, 16 @@ -175,7 +1125,95 @@ ; SPILL-O2-NEXT: add a0, a0, a1 ; SPILL-O2-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 20 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret @@ -192,8 +1230,96 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 2 +; SPILL-O0-NEXT: li a3, 20 +; SPILL-O0-NEXT: mul a2, a2, a3 ; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 18 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 4 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 4 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 4 +; SPILL-O0-NEXT: sub a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 14 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 13 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 12 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 11 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 10 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 3 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 3 +; SPILL-O0-NEXT: sub a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 6 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 2 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; SPILL-O0-NEXT: # implicit-def: $v8m4 ; SPILL-O0-NEXT: # implicit-def: $v12m4 ; SPILL-O0-NEXT: # implicit-def: $v16m4 @@ -210,7 +1336,95 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 18 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 20 +; SPILL-O0-NEXT: mul a0, a0, a1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -219,8 +1433,97 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a2, vlenb -; SPILL-O2-NEXT: slli a2, a2, 3 +; SPILL-O2-NEXT: li a3, 24 +; SPILL-O2-NEXT: mul a2, a2, a3 ; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 22 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 21 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 20 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 19 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 18 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 4 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 4 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 4 +; SPILL-O2-NEXT: sub a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 14 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 13 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 12 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 11 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 10 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 3 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; SPILL-O2-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; SPILL-O2-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-NEXT: addi a0, sp, 16 @@ -238,7 +1541,96 @@ ; SPILL-O2-NEXT: add a0, a0, a1 ; SPILL-O2-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 22 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 21 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 20 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 19 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 24 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret @@ -255,8 +1647,95 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: li a3, 18 +; SPILL-O0-NEXT: mul a2, a2, a3 ; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 4 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 4 +; SPILL-O0-NEXT: sub a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 14 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 13 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 12 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 11 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 10 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 3 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 3 +; SPILL-O0-NEXT: sub a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 6 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 2 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 1 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; SPILL-O0-NEXT: # implicit-def: $v8m2 ; SPILL-O0-NEXT: # implicit-def: $v10m2 ; SPILL-O0-NEXT: # implicit-def: $v16m2 @@ -276,7 +1755,94 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 1 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 18 +; SPILL-O0-NEXT: mul a0, a0, a1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -285,9 +1851,97 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a2, vlenb -; SPILL-O2-NEXT: li a3, 6 +; SPILL-O2-NEXT: li a3, 22 ; SPILL-O2-NEXT: mul a2, a2, a3 ; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 20 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 19 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 18 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 4 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 4 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 4 +; SPILL-O2-NEXT: sub a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 14 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 13 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 12 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 11 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 10 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 3 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 3 +; SPILL-O2-NEXT: sub a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 6 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; SPILL-O2-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O2-NEXT: vlseg3e32.v v8, (a0) ; SPILL-O2-NEXT: addi a0, sp, 16 @@ -309,8 +1963,96 @@ ; SPILL-O2-NEXT: add a0, a0, a1 ; SPILL-O2-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 20 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 19 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: li a1, 6 ; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 22 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d -mattr=+d -O0 < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -mattr=+m,+d -O0 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O0 %s -; RUN: llc -mtriple=riscv64 -mattr=+v,+d -mattr=+d -O2 < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -mattr=+m,+d -O2 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O2 %s -; RUN: llc -mtriple=riscv64 -mattr=+v,+d -mattr=+d -riscv-v-vector-bits-max=128 -O2 < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -mattr=+m,+d -riscv-v-vector-bits-max=128 -O2 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O2-VLEN128 %s @@ -15,8 +15,95 @@ ; SPILL-O0-NEXT: addi sp, sp, -48 ; SPILL-O0-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; SPILL-O0-NEXT: csrr a1, vlenb -; SPILL-O0-NEXT: slli a1, a1, 1 +; SPILL-O0-NEXT: li a2, 18 +; SPILL-O0-NEXT: mul a1, a1, a2 ; SPILL-O0-NEXT: sub sp, sp, a1 +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: slli a1, a1, 4 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 32 +; SPILL-O0-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: slli a2, a1, 4 +; SPILL-O0-NEXT: sub a1, a2, a1 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 32 +; SPILL-O0-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: li a2, 14 +; SPILL-O0-NEXT: mul a1, a1, a2 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 32 +; SPILL-O0-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: li a2, 13 +; SPILL-O0-NEXT: mul a1, a1, a2 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 32 +; SPILL-O0-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: li a2, 12 +; SPILL-O0-NEXT: mul a1, a1, a2 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 32 +; SPILL-O0-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: li a2, 11 +; SPILL-O0-NEXT: mul a1, a1, a2 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 32 +; SPILL-O0-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: li a2, 10 +; SPILL-O0-NEXT: mul a1, a1, a2 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 32 +; SPILL-O0-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: slli a2, a1, 3 +; SPILL-O0-NEXT: add a1, a2, a1 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 32 +; SPILL-O0-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: slli a1, a1, 3 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 32 +; SPILL-O0-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: slli a2, a1, 3 +; SPILL-O0-NEXT: sub a1, a2, a1 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 32 +; SPILL-O0-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: li a2, 6 +; SPILL-O0-NEXT: mul a1, a1, a2 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 32 +; SPILL-O0-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: slli a2, a1, 2 +; SPILL-O0-NEXT: add a1, a2, a1 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 32 +; SPILL-O0-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: slli a1, a1, 2 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 32 +; SPILL-O0-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: slli a2, a1, 1 +; SPILL-O0-NEXT: add a1, a2, a1 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 32 +; SPILL-O0-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a1, vlenb +; SPILL-O0-NEXT: slli a1, a1, 1 +; SPILL-O0-NEXT: add a1, sp, a1 +; SPILL-O0-NEXT: addi a1, a1, 32 +; SPILL-O0-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; SPILL-O0-NEXT: sd a0, 16(sp) # 8-byte Folded Spill ; SPILL-O0-NEXT: vmv1r.v v10, v9 ; SPILL-O0-NEXT: vmv1r.v v9, v8 @@ -44,7 +131,94 @@ ; SPILL-O0-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; SPILL-O0-NEXT: vfadd.vv v8, v9, v10 ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 32 +; SPILL-O0-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 32 +; SPILL-O0-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 32 +; SPILL-O0-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 32 +; SPILL-O0-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 32 +; SPILL-O0-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 32 +; SPILL-O0-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 32 +; SPILL-O0-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 32 +; SPILL-O0-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 32 +; SPILL-O0-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 32 +; SPILL-O0-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 32 +; SPILL-O0-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 32 +; SPILL-O0-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 32 +; SPILL-O0-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 1 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 32 +; SPILL-O0-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 32 +; SPILL-O0-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 18 +; SPILL-O0-NEXT: mul a0, a0, a1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; SPILL-O0-NEXT: addi sp, sp, 48 @@ -56,8 +230,95 @@ ; SPILL-O2-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; SPILL-O2-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; SPILL-O2-NEXT: csrr a1, vlenb -; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: li a2, 18 +; SPILL-O2-NEXT: mul a1, a1, a2 ; SPILL-O2-NEXT: sub sp, sp, a1 +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 4 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a2, a1, 4 +; SPILL-O2-NEXT: sub a1, a2, a1 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: li a2, 14 +; SPILL-O2-NEXT: mul a1, a1, a2 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: li a2, 13 +; SPILL-O2-NEXT: mul a1, a1, a2 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: li a2, 12 +; SPILL-O2-NEXT: mul a1, a1, a2 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: li a2, 11 +; SPILL-O2-NEXT: mul a1, a1, a2 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: li a2, 10 +; SPILL-O2-NEXT: mul a1, a1, a2 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a2, a1, 3 +; SPILL-O2-NEXT: add a1, a2, a1 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 3 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a2, a1, 3 +; SPILL-O2-NEXT: sub a1, a2, a1 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: li a2, 6 +; SPILL-O2-NEXT: mul a1, a1, a2 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a2, a1, 2 +; SPILL-O2-NEXT: add a1, a2, a1 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 2 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a2, a1, 1 +; SPILL-O2-NEXT: add a1, a2, a1 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a1, vlenb +; SPILL-O2-NEXT: slli a1, a1, 1 +; SPILL-O2-NEXT: add a1, sp, a1 +; SPILL-O2-NEXT: addi a1, a1, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; SPILL-O2-NEXT: mv s0, a0 ; SPILL-O2-NEXT: addi a1, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill @@ -79,7 +340,94 @@ ; SPILL-O2-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: vfadd.vv v8, v9, v8 ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 1 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; SPILL-O2-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -91,7 +439,37 @@ ; SPILL-O2-VLEN128-NEXT: addi sp, sp, -32 ; SPILL-O2-VLEN128-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; SPILL-O2-VLEN128-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; SPILL-O2-VLEN128-NEXT: addi sp, sp, -32 +; SPILL-O2-VLEN128-NEXT: addi sp, sp, -288 +; SPILL-O2-VLEN128-NEXT: addi a1, sp, 272 +; SPILL-O2-VLEN128-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; SPILL-O2-VLEN128-NEXT: addi a1, sp, 256 +; SPILL-O2-VLEN128-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; SPILL-O2-VLEN128-NEXT: addi a1, sp, 240 +; SPILL-O2-VLEN128-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; SPILL-O2-VLEN128-NEXT: addi a1, sp, 224 +; SPILL-O2-VLEN128-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; SPILL-O2-VLEN128-NEXT: addi a1, sp, 208 +; SPILL-O2-VLEN128-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; SPILL-O2-VLEN128-NEXT: addi a1, sp, 192 +; SPILL-O2-VLEN128-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; SPILL-O2-VLEN128-NEXT: addi a1, sp, 176 +; SPILL-O2-VLEN128-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; SPILL-O2-VLEN128-NEXT: addi a1, sp, 160 +; SPILL-O2-VLEN128-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; SPILL-O2-VLEN128-NEXT: addi a1, sp, 144 +; SPILL-O2-VLEN128-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; SPILL-O2-VLEN128-NEXT: addi a1, sp, 128 +; SPILL-O2-VLEN128-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; SPILL-O2-VLEN128-NEXT: addi a1, sp, 112 +; SPILL-O2-VLEN128-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; SPILL-O2-VLEN128-NEXT: addi a1, sp, 96 +; SPILL-O2-VLEN128-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; SPILL-O2-VLEN128-NEXT: addi a1, sp, 80 +; SPILL-O2-VLEN128-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; SPILL-O2-VLEN128-NEXT: addi a1, sp, 64 +; SPILL-O2-VLEN128-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; SPILL-O2-VLEN128-NEXT: addi a1, sp, 48 +; SPILL-O2-VLEN128-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; SPILL-O2-VLEN128-NEXT: mv s0, a0 ; SPILL-O2-VLEN128-NEXT: addi a1, sp, 16 ; SPILL-O2-VLEN128-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill @@ -108,7 +486,37 @@ ; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16 ; SPILL-O2-VLEN128-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload ; SPILL-O2-VLEN128-NEXT: vfadd.vv v8, v9, v8 -; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32 +; SPILL-O2-VLEN128-NEXT: addi a0, sp, 272 +; SPILL-O2-VLEN128-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-VLEN128-NEXT: addi a0, sp, 256 +; SPILL-O2-VLEN128-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-VLEN128-NEXT: addi a0, sp, 240 +; SPILL-O2-VLEN128-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-VLEN128-NEXT: addi a0, sp, 224 +; SPILL-O2-VLEN128-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-VLEN128-NEXT: addi a0, sp, 208 +; SPILL-O2-VLEN128-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-VLEN128-NEXT: addi a0, sp, 192 +; SPILL-O2-VLEN128-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-VLEN128-NEXT: addi a0, sp, 176 +; SPILL-O2-VLEN128-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-VLEN128-NEXT: addi a0, sp, 160 +; SPILL-O2-VLEN128-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-VLEN128-NEXT: addi a0, sp, 144 +; SPILL-O2-VLEN128-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-VLEN128-NEXT: addi a0, sp, 128 +; SPILL-O2-VLEN128-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-VLEN128-NEXT: addi a0, sp, 112 +; SPILL-O2-VLEN128-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-VLEN128-NEXT: addi a0, sp, 96 +; SPILL-O2-VLEN128-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-VLEN128-NEXT: addi a0, sp, 80 +; SPILL-O2-VLEN128-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-VLEN128-NEXT: addi a0, sp, 64 +; SPILL-O2-VLEN128-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-VLEN128-NEXT: addi a0, sp, 48 +; SPILL-O2-VLEN128-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-VLEN128-NEXT: addi sp, sp, 288 ; SPILL-O2-VLEN128-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; SPILL-O2-VLEN128-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32 diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -O0 < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O0 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O0 %s -; RUN: llc -mtriple=riscv64 -mattr=+v -O2 < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -O2 < %s \ ; RUN: | FileCheck --check-prefix=SPILL-O2 %s define @spill_lmul_1( %va) nounwind { @@ -9,8 +9,93 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: slli a0, a0, 4 ; SPILL-O0-NEXT: sub sp, sp, a0 +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 1 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP @@ -18,7 +103,92 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 1 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -27,15 +197,185 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: slli a0, a0, 4 ; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 1 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 1 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret @@ -51,8 +391,95 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: li a1, 18 +; SPILL-O0-NEXT: mul a0, a0, a1 ; SPILL-O0-NEXT: sub sp, sp, a0 +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 1 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP @@ -60,7 +487,94 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 1 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 18 +; SPILL-O0-NEXT: mul a0, a0, a1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -69,15 +583,189 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 1 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 1 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret @@ -93,8 +781,96 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: li a1, 20 +; SPILL-O0-NEXT: mul a0, a0, a1 ; SPILL-O0-NEXT: sub sp, sp, a0 +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 18 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP @@ -102,7 +878,95 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 18 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 20 +; SPILL-O0-NEXT: mul a0, a0, a1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -111,15 +975,191 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: li a1, 20 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 20 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret @@ -135,8 +1175,97 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a0, vlenb -; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: li a1, 24 +; SPILL-O0-NEXT: mul a0, a0, a1 ; SPILL-O0-NEXT: sub sp, sp, a0 +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 22 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 21 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 20 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 19 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 18 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O0-NEXT: #APP @@ -144,7 +1273,96 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 22 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 21 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 20 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 19 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 18 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 24 +; SPILL-O0-NEXT: mul a0, a0, a1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -153,15 +1371,193 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a0, vlenb -; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: li a1, 24 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: sub sp, sp, a0 +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 22 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 21 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 20 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 19 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: addi a0, sp, 16 ; SPILL-O2-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; SPILL-O2-NEXT: #APP ; SPILL-O2-NEXT: #NO_APP ; SPILL-O2-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 22 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 21 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 20 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 19 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 24 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll --- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll @@ -9,8 +9,93 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: slli a2, a2, 4 ; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 4 +; SPILL-O0-NEXT: sub a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 14 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 13 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 12 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 11 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 10 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 3 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 3 +; SPILL-O0-NEXT: sub a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 6 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 2 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 1 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; SPILL-O0-NEXT: # implicit-def: $v8 ; SPILL-O0-NEXT: # implicit-def: $v9 ; SPILL-O0-NEXT: # implicit-def: $v10 @@ -27,7 +112,92 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 1 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -36,8 +206,95 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a2, vlenb -; SPILL-O2-NEXT: slli a2, a2, 1 +; SPILL-O2-NEXT: li a3, 18 +; SPILL-O2-NEXT: mul a2, a2, a3 ; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 4 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 4 +; SPILL-O2-NEXT: sub a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 14 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 13 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 12 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 11 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 10 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 3 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 3 +; SPILL-O2-NEXT: sub a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 6 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 2 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 1 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 1 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; SPILL-O2-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; SPILL-O2-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-NEXT: addi a0, sp, 16 @@ -52,7 +309,94 @@ ; SPILL-O2-NEXT: add a0, a0, a1 ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 1 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret @@ -69,8 +413,93 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: slli a2, a2, 4 ; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 4 +; SPILL-O0-NEXT: sub a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 14 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 13 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 12 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 11 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 10 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 3 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 3 +; SPILL-O0-NEXT: sub a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 6 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 2 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 1 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; SPILL-O0-NEXT: # implicit-def: $v8 ; SPILL-O0-NEXT: # implicit-def: $v9 ; SPILL-O0-NEXT: # implicit-def: $v10 @@ -87,7 +516,92 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 1 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -96,8 +610,95 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a2, vlenb -; SPILL-O2-NEXT: slli a2, a2, 1 +; SPILL-O2-NEXT: li a3, 18 +; SPILL-O2-NEXT: mul a2, a2, a3 ; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 4 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 4 +; SPILL-O2-NEXT: sub a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 14 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 13 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 12 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 11 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 10 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 3 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 3 +; SPILL-O2-NEXT: sub a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 6 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 2 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 1 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 1 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; SPILL-O2-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; SPILL-O2-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-NEXT: addi a0, sp, 16 @@ -112,7 +713,94 @@ ; SPILL-O2-NEXT: add a0, a0, a1 ; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 1 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret @@ -129,8 +817,95 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: li a3, 18 +; SPILL-O0-NEXT: mul a2, a2, a3 ; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 4 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 4 +; SPILL-O0-NEXT: sub a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 14 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 13 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 12 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 11 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 10 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 3 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 3 +; SPILL-O0-NEXT: sub a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 6 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 2 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 1 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; SPILL-O0-NEXT: # implicit-def: $v8m2 ; SPILL-O0-NEXT: # implicit-def: $v10m2 ; SPILL-O0-NEXT: # implicit-def: $v12m2 @@ -147,7 +922,94 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 1 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 18 +; SPILL-O0-NEXT: mul a0, a0, a1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -156,8 +1018,96 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a2, vlenb -; SPILL-O2-NEXT: slli a2, a2, 2 +; SPILL-O2-NEXT: li a3, 20 +; SPILL-O2-NEXT: mul a2, a2, a3 ; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 18 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 4 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 4 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 4 +; SPILL-O2-NEXT: sub a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 14 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 13 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 12 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 11 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 10 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 3 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 3 +; SPILL-O2-NEXT: sub a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 6 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 2 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; SPILL-O2-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O2-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-NEXT: addi a0, sp, 16 @@ -175,7 +1125,95 @@ ; SPILL-O2-NEXT: add a0, a0, a1 ; SPILL-O2-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 6 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 2 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 2 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 20 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret @@ -192,8 +1230,96 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 2 +; SPILL-O0-NEXT: li a3, 20 +; SPILL-O0-NEXT: mul a2, a2, a3 ; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 18 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 4 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 4 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 4 +; SPILL-O0-NEXT: sub a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 14 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 13 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 12 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 11 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 10 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 3 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 3 +; SPILL-O0-NEXT: sub a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 6 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 2 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; SPILL-O0-NEXT: # implicit-def: $v8m4 ; SPILL-O0-NEXT: # implicit-def: $v12m4 ; SPILL-O0-NEXT: # implicit-def: $v16m4 @@ -210,7 +1336,95 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 18 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 20 +; SPILL-O0-NEXT: mul a0, a0, a1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -219,8 +1433,97 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a2, vlenb -; SPILL-O2-NEXT: slli a2, a2, 3 +; SPILL-O2-NEXT: li a3, 24 +; SPILL-O2-NEXT: mul a2, a2, a3 ; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 22 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 21 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 20 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 19 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 18 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 4 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 4 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 4 +; SPILL-O2-NEXT: sub a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 14 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 13 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 12 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 11 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 10 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 3 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; SPILL-O2-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; SPILL-O2-NEXT: vlseg2e32.v v8, (a0) ; SPILL-O2-NEXT: addi a0, sp, 16 @@ -238,7 +1541,96 @@ ; SPILL-O2-NEXT: add a0, a0, a1 ; SPILL-O2-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 22 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 21 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 20 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 19 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 24 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret @@ -255,8 +1647,95 @@ ; SPILL-O0: # %bb.0: # %entry ; SPILL-O0-NEXT: addi sp, sp, -16 ; SPILL-O0-NEXT: csrr a2, vlenb -; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: li a3, 18 +; SPILL-O0-NEXT: mul a2, a2, a3 ; SPILL-O0-NEXT: sub sp, sp, a2 +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 4 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 4 +; SPILL-O0-NEXT: sub a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 14 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 13 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 12 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 11 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 10 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 3 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 3 +; SPILL-O0-NEXT: sub a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: li a3, 6 +; SPILL-O0-NEXT: mul a2, a2, a3 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 2 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a3, a2, 1 +; SPILL-O0-NEXT: add a2, a3, a2 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; SPILL-O0-NEXT: csrr a2, vlenb +; SPILL-O0-NEXT: slli a2, a2, 1 +; SPILL-O0-NEXT: add a2, sp, a2 +; SPILL-O0-NEXT: addi a2, a2, 16 +; SPILL-O0-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; SPILL-O0-NEXT: # implicit-def: $v8m2 ; SPILL-O0-NEXT: # implicit-def: $v10m2 ; SPILL-O0-NEXT: # implicit-def: $v16m2 @@ -276,7 +1755,94 @@ ; SPILL-O0-NEXT: addi a0, sp, 16 ; SPILL-O0-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload ; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 4 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 4 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 14 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 13 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 12 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 11 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 10 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 3 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 3 +; SPILL-O0-NEXT: sub a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 6 +; SPILL-O0-NEXT: mul a0, a0, a1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 2 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a0, a0, 2 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: slli a1, a0, 1 +; SPILL-O0-NEXT: add a0, a1, a0 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb ; SPILL-O0-NEXT: slli a0, a0, 1 +; SPILL-O0-NEXT: add a0, sp, a0 +; SPILL-O0-NEXT: addi a0, a0, 16 +; SPILL-O0-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O0-NEXT: csrr a0, vlenb +; SPILL-O0-NEXT: li a1, 18 +; SPILL-O0-NEXT: mul a0, a0, a1 ; SPILL-O0-NEXT: add sp, sp, a0 ; SPILL-O0-NEXT: addi sp, sp, 16 ; SPILL-O0-NEXT: ret @@ -285,9 +1851,97 @@ ; SPILL-O2: # %bb.0: # %entry ; SPILL-O2-NEXT: addi sp, sp, -16 ; SPILL-O2-NEXT: csrr a2, vlenb -; SPILL-O2-NEXT: li a3, 6 +; SPILL-O2-NEXT: li a3, 22 ; SPILL-O2-NEXT: mul a2, a2, a3 ; SPILL-O2-NEXT: sub sp, sp, a2 +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 20 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 19 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 18 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 4 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 4 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 4 +; SPILL-O2-NEXT: sub a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 14 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 13 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 12 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 11 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 10 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 3 +; SPILL-O2-NEXT: add a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a2, a2, 3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: slli a3, a2, 3 +; SPILL-O2-NEXT: sub a2, a3, a2 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; SPILL-O2-NEXT: csrr a2, vlenb +; SPILL-O2-NEXT: li a3, 6 +; SPILL-O2-NEXT: mul a2, a2, a3 +; SPILL-O2-NEXT: add a2, sp, a2 +; SPILL-O2-NEXT: addi a2, a2, 16 +; SPILL-O2-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; SPILL-O2-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; SPILL-O2-NEXT: vlseg3e32.v v8, (a0) ; SPILL-O2-NEXT: addi a0, sp, 16 @@ -309,8 +1963,96 @@ ; SPILL-O2-NEXT: add a0, a0, a1 ; SPILL-O2-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload ; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 20 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 19 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 18 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 4 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 4 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 14 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 13 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 12 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 11 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 10 +; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: add a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a0, a0, 3 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: slli a1, a0, 3 +; SPILL-O2-NEXT: sub a0, a1, a0 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb ; SPILL-O2-NEXT: li a1, 6 ; SPILL-O2-NEXT: mul a0, a0, a1 +; SPILL-O2-NEXT: add a0, sp, a0 +; SPILL-O2-NEXT: addi a0, a0, 16 +; SPILL-O2-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; SPILL-O2-NEXT: csrr a0, vlenb +; SPILL-O2-NEXT: li a1, 22 +; SPILL-O2-NEXT: mul a0, a0, a1 ; SPILL-O2-NEXT: add sp, sp, a0 ; SPILL-O2-NEXT: addi sp, sp, 16 ; SPILL-O2-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll --- a/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-args-by-mem.ll @@ -1,17 +1,195 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v < %s 2>&1 | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+v < %s 2>&1 | FileCheck %s define @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, %w, %x, %y, %z) { ; CHECK-LABEL: bar: ; CHECK: # %bb.0: -; CHECK-NEXT: ld a0, 0(sp) -; CHECK-NEXT: ld a1, 8(sp) +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: ld a0, 16(a0) +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: ld a1, 24(a1) ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vl8re32.v v0, (a1) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vadd.vv v8, v8, v24 ; CHECK-NEXT: vadd.vv v16, v16, v0 ; CHECK-NEXT: vadd.vv v8, v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %s0 = add %w, %y %s1 = add %x, %z diff --git a/llvm/test/CodeGen/RISCV/rvv/select-fp.ll b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/select-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/select-fp.ll @@ -147,11 +147,55 @@ define @select_nxv32f16(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.x v24, a0 ; CHECK-NEXT: vmsne.vi v0, v24, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -160,12 +204,56 @@ define @selectcc_nxv32f16(half %a, half %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: feq.h a0, fa0, fa1 ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.x v24, a0 ; CHECK-NEXT: vmsne.vi v0, v24, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %cmp = fcmp oeq half %a, %b %v = select i1 %cmp, %c, %d @@ -287,11 +375,33 @@ define @select_nxv16f32(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.x v24, a0 ; CHECK-NEXT: vmsne.vi v0, v24, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -300,12 +410,34 @@ define @selectcc_nxv16f32(float %a, float %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: feq.s a0, fa0, fa1 ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.x v24, a0 ; CHECK-NEXT: vmsne.vi v0, v24, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %cmp = fcmp oeq float %a, %b %v = select i1 %cmp, %c, %d @@ -400,8 +532,8 @@ ; CHECK-LABEL: select_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vmsne.vi v0, v24, 0 +; CHECK-NEXT: vmv.v.x v0, a0 +; CHECK-NEXT: vmsne.vi v0, v0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret @@ -414,8 +546,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: feq.d a0, fa0, fa1 ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vmsne.vi v0, v24, 0 +; CHECK-NEXT: vmv.v.x v0, a0 +; CHECK-NEXT: vmsne.vi v0, v0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/select-int.ll b/llvm/test/CodeGen/RISCV/rvv/select-int.ll --- a/llvm/test/CodeGen/RISCV/rvv/select-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/select-int.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64 define @select_nxv1i1(i1 zeroext %c, %a, %b) { @@ -386,10 +386,100 @@ define @select_nxv64i8(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv64i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.x v24, a0 ; CHECK-NEXT: vmsne.vi v0, v24, 0 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -398,12 +488,102 @@ define @selectcc_nxv64i8(i8 signext %a, i8 signext %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv64i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: xor a0, a0, a1 ; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.x v24, a0 ; CHECK-NEXT: vmsne.vi v0, v24, 0 ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %cmp = icmp ne i8 %a, %b %v = select i1 %cmp, %c, %d @@ -558,11 +738,55 @@ define @select_nxv32i16(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv32i16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vmv.v.x v24, a0 ; CHECK-NEXT: vmsne.vi v0, v24, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -571,6 +795,29 @@ define @selectcc_nxv32i16(i16 signext %a, i16 signext %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv32i16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: xor a0, a0, a1 ; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma @@ -578,6 +825,27 @@ ; CHECK-NEXT: vmsne.vi v0, v24, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %cmp = icmp ne i16 %a, %b %v = select i1 %cmp, %c, %d @@ -703,11 +971,33 @@ define @select_nxv16i32(i1 zeroext %c, %a, %b) { ; CHECK-LABEL: select_nxv16i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.x v24, a0 ; CHECK-NEXT: vmsne.vi v0, v24, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = select i1 %c, %a, %b ret %v @@ -716,6 +1006,18 @@ define @selectcc_nxv16i32(i32 signext %a, i32 signext %b, %c, %d) { ; CHECK-LABEL: selectcc_nxv16i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: xor a0, a0, a1 ; CHECK-NEXT: snez a0, a0 ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma @@ -723,6 +1025,16 @@ ; CHECK-NEXT: vmsne.vi v0, v24, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %cmp = icmp ne i32 %a, %b %v = select i1 %cmp, %c, %d @@ -859,8 +1171,8 @@ ; CHECK-LABEL: select_nxv8i64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.x v24, a0 -; CHECK-NEXT: vmsne.vi v0, v24, 0 +; CHECK-NEXT: vmv.v.x v0, a0 +; CHECK-NEXT: vmsne.vi v0, v0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 ; CHECK-NEXT: ret @@ -876,8 +1188,8 @@ ; RV32-NEXT: or a0, a0, a1 ; RV32-NEXT: snez a0, a0 ; RV32-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; RV32-NEXT: vmv.v.x v24, a0 -; RV32-NEXT: vmsne.vi v0, v24, 0 +; RV32-NEXT: vmv.v.x v0, a0 +; RV32-NEXT: vmsne.vi v0, v0, 0 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vmerge.vvm v8, v16, v8, v0 ; RV32-NEXT: ret @@ -887,8 +1199,8 @@ ; RV64-NEXT: xor a0, a0, a1 ; RV64-NEXT: snez a0, a0 ; RV64-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; RV64-NEXT: vmv.v.x v24, a0 -; RV64-NEXT: vmsne.vi v0, v24, 0 +; RV64-NEXT: vmv.v.x v0, a0 +; RV64-NEXT: vmsne.vi v0, v0, 0 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vmerge.vvm v8, v16, v8, v0 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll @@ -1086,9 +1086,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 30 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 29 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 28 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 27 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 26 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 25 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 23 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 4 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -1131,7 +1220,96 @@ ; CHECK-NEXT: vslideup.vx v16, v1, a1 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 30 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 29 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 28 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 27 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 25 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1679,9 +1857,23 @@ define @fcmp_oeq_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oeq_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"oeq", %m, i32 %evl) ret %v @@ -1716,9 +1908,23 @@ define @fcmp_ogt_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ogt_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmflt.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"ogt", %m, i32 %evl) ret %v @@ -1753,9 +1959,23 @@ define @fcmp_oge_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_oge_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfle.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"oge", %m, i32 %evl) ret %v @@ -1790,9 +2010,23 @@ define @fcmp_olt_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_olt_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"olt", %m, i32 %evl) ret %v @@ -1827,9 +2061,23 @@ define @fcmp_ole_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ole_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfle.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"ole", %m, i32 %evl) ret %v @@ -1864,10 +2112,32 @@ define @fcmp_one_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_one_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t ; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"one", %m, i32 %evl) ret %v @@ -1904,10 +2174,24 @@ define @fcmp_ord_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ord_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v24, v16, v16, v0.t ; CHECK-NEXT: vmfeq.vv v16, v8, v8, v0.t ; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"ord", %m, i32 %evl) ret %v @@ -1916,12 +2200,26 @@ define @fcmp_ord_vf_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ord_vf_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v24, v8, v8, v0.t ; CHECK-NEXT: vmfeq.vf v8, v16, fa0, v0.t ; CHECK-NEXT: vmand.mm v0, v24, v8 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1932,12 +2230,26 @@ define @fcmp_ord_vf_swap_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ord_vf_swap_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v24, v8, v8, v0.t ; CHECK-NEXT: vmfeq.vf v8, v16, fa0, v0.t ; CHECK-NEXT: vmand.mm v0, v8, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1948,10 +2260,32 @@ define @fcmp_ueq_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ueq_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t ; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"ueq", %m, i32 %evl) ret %v @@ -1988,9 +2322,23 @@ define @fcmp_ugt_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ugt_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfle.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"ugt", %m, i32 %evl) ret %v @@ -2025,9 +2373,23 @@ define @fcmp_uge_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uge_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"uge", %m, i32 %evl) ret %v @@ -2062,9 +2424,23 @@ define @fcmp_ult_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ult_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfle.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"ult", %m, i32 %evl) ret %v @@ -2099,9 +2475,23 @@ define @fcmp_ule_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_ule_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmflt.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"ule", %m, i32 %evl) ret %v @@ -2136,9 +2526,23 @@ define @fcmp_une_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_une_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfne.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"une", %m, i32 %evl) ret %v @@ -2173,10 +2577,24 @@ define @fcmp_uno_vv_nxv8f64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uno_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfne.vv v24, v16, v16, v0.t ; CHECK-NEXT: vmfne.vv v16, v8, v8, v0.t ; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fcmp.nxv8f64( %va, %vb, metadata !"uno", %m, i32 %evl) ret %v @@ -2185,12 +2603,26 @@ define @fcmp_uno_vf_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uno_vf_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfne.vv v24, v8, v8, v0.t ; CHECK-NEXT: vmfne.vf v8, v16, fa0, v0.t ; CHECK-NEXT: vmor.mm v0, v24, v8 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -2201,12 +2633,26 @@ define @fcmp_uno_vf_swap_nxv8f64( %va, double %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: fcmp_uno_vf_swap_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmfne.vv v24, v8, v8, v0.t ; CHECK-NEXT: vmfne.vf v8, v16, fa0, v0.t ; CHECK-NEXT: vmor.mm v0, v8, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, double %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -2222,10 +2668,70 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 48 +; CHECK-NEXT: li a3, 58 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x3a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 58 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 57 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 56 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 55 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 54 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 53 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 52 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 51 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 50 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 49 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 48 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 5 @@ -2369,8 +2875,68 @@ ; CHECK-NEXT: vslideup.vx v18, v16, a0 ; CHECK-NEXT: vmv1r.v v0, v18 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 57 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 56 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 55 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 54 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 53 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 51 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 50 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 49 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 48 ; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 58 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp.ll @@ -1958,10 +1958,24 @@ define @fcmp_one_vv_nxv8f64( %va, %vb) { ; CHECK-LABEL: fcmp_one_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v24, v16, v8 +; CHECK-NEXT: vmor.mm v0, v24, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %vc = fcmp one %va, %vb ret %vc @@ -2021,9 +2035,9 @@ ; CHECK-LABEL: fcmp_ord_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: ret %vc = fcmp ord %va, %vb ret %vc @@ -2034,9 +2048,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -2049,9 +2063,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -2063,9 +2077,9 @@ ; CHECK-LABEL: fcmp_ord_vv_nxv8f64_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: ret %vc = fcmp ord %va, %vb ret %vc @@ -2076,9 +2090,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -2089,10 +2103,24 @@ define @fcmp_ueq_vv_nxv8f64( %va, %vb) { ; CHECK-LABEL: fcmp_ueq_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v24, v16, v8 +; CHECK-NEXT: vmnor.mm v0, v24, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %vc = fcmp ueq %va, %vb ret %vc @@ -2152,8 +2180,8 @@ ; CHECK-LABEL: fcmp_ugt_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %vc = fcmp ugt %va, %vb ret %vc @@ -2211,8 +2239,8 @@ ; CHECK-LABEL: fcmp_uge_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %vc = fcmp uge %va, %vb ret %vc @@ -2270,8 +2298,8 @@ ; CHECK-LABEL: fcmp_ult_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %vc = fcmp ult %va, %vb ret %vc @@ -2329,8 +2357,8 @@ ; CHECK-LABEL: fcmp_ule_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %vc = fcmp ule %va, %vb ret %vc @@ -2444,9 +2472,9 @@ ; CHECK-LABEL: fcmp_uno_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v16, v16 +; CHECK-NEXT: vmfne.vv v0, v16, v16 ; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmor.mm v0, v16, v0 ; CHECK-NEXT: ret %vc = fcmp uno %va, %vb ret %vc @@ -2457,9 +2485,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 ; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmor.mm v0, v16, v0 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -2472,9 +2500,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 ; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v24, v16 +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -2486,9 +2514,9 @@ ; CHECK-LABEL: fcmp_uno_vv_nxv8f64_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v16, v16 +; CHECK-NEXT: vmfne.vv v0, v16, v16 ; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmor.mm v0, v16, v0 ; CHECK-NEXT: ret %vc = fcmp uno %va, %vb ret %vc @@ -2499,9 +2527,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 ; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmor.mm v0, v16, v0 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -2514,6 +2542,14 @@ define @fcmp_oeq_vf_nx16f64( %va) { ; RV32-LABEL: fcmp_oeq_vf_nx16f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; RV32-NEXT: fcvt.d.w fa5, zero ; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV32-NEXT: vmfeq.vf v24, v16, fa5 @@ -2523,10 +2559,24 @@ ; RV32-NEXT: add a1, a0, a0 ; RV32-NEXT: vsetvli zero, a1, e8, mf4, tu, ma ; RV32-NEXT: vslideup.vx v0, v24, a0 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: fcmp_oeq_vf_nx16f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; RV64-NEXT: fmv.d.x fa5, zero ; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; RV64-NEXT: vmfeq.vf v24, v16, fa5 @@ -2536,6 +2586,12 @@ ; RV64-NEXT: add a1, a0, a0 ; RV64-NEXT: vsetvli zero, a1, e8, mf4, tu, ma ; RV64-NEXT: vslideup.vx v0, v24, a0 +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %vc = fcmp oeq %va, zeroinitializer ret %vc diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll @@ -1161,9 +1161,69 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a4, 26 +; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 25 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 24 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 23 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 22 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 21 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 20 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 19 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 18 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a4, a1, 4 +; CHECK-NEXT: add a1, a4, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v2, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -1202,7 +1262,67 @@ ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmv1r.v v8, v1 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 25 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1213,6 +1333,18 @@ define @icmp_eq_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_nxv128i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) @@ -1233,6 +1365,16 @@ ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i8 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1243,6 +1385,18 @@ define @icmp_eq_vx_swap_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_swap_nxv128i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) @@ -1263,6 +1417,16 @@ ; CHECK-NEXT: vmseq.vx v16, v8, a0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmv1r.v v8, v25 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i8 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -2383,9 +2547,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 30 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 29 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 28 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 27 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 26 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 25 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 23 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 4 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 @@ -2428,7 +2681,96 @@ ; CHECK-NEXT: vslideup.vx v16, v1, a1 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 30 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 29 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 28 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 27 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 25 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -2439,6 +2781,18 @@ define @icmp_eq_vx_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: srli a2, a3, 2 @@ -2462,6 +2816,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v16, v25, a2 ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -2472,6 +2836,18 @@ define @icmp_eq_vx_swap_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_swap_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: srli a2, a3, 2 @@ -2495,6 +2871,16 @@ ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vx v16, v25, a2 ; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -3289,9 +3675,23 @@ define @icmp_eq_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vv_nxv8i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmseq.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.icmp.nxv8i64( %va, %vb, metadata !"eq", %m, i32 %evl) ret %v @@ -3302,6 +3702,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -3310,6 +3754,49 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmseq.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -3330,6 +3817,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -3338,6 +3869,49 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmseq.vv v16, v24, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -3382,9 +3956,23 @@ define @icmp_ne_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ne_vv_nxv8i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmsne.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.icmp.nxv8i64( %va, %vb, metadata !"ne", %m, i32 %evl) ret %v @@ -3395,6 +3983,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -3403,6 +4035,49 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmsne.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -3423,6 +4098,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -3431,6 +4150,49 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmsne.vv v16, v24, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -3475,9 +4237,23 @@ define @icmp_ugt_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ugt_vv_nxv8i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmsltu.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.icmp.nxv8i64( %va, %vb, metadata !"ugt", %m, i32 %evl) ret %v @@ -3488,6 +4264,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -3496,6 +4316,49 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmsltu.vv v16, v24, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -3516,6 +4379,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -3524,6 +4431,49 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmsltu.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -3568,9 +4518,23 @@ define @icmp_uge_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_uge_vv_nxv8i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmsleu.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.icmp.nxv8i64( %va, %vb, metadata !"uge", %m, i32 %evl) ret %v @@ -3581,6 +4545,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -3589,16 +4597,149 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmsleu.vv v16, v24, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_uge_vx_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vmv.v.x v24, a0 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmsleu.vv v16, v24, v8, v0.t ; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -3611,6 +4752,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -3619,6 +4804,49 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmsleu.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -3663,9 +4891,23 @@ define @icmp_ult_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_ult_vv_nxv8i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmsltu.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.icmp.nxv8i64( %va, %vb, metadata !"ult", %m, i32 %evl) ret %v @@ -3676,6 +4918,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -3684,6 +4970,49 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmsltu.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -3704,6 +5033,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -3712,6 +5085,49 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmsltu.vv v16, v24, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -3756,9 +5172,23 @@ define @icmp_sgt_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sgt_vv_nxv8i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmslt.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.icmp.nxv8i64( %va, %vb, metadata !"sgt", %m, i32 %evl) ret %v @@ -3769,6 +5199,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -3777,6 +5251,49 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmslt.vv v16, v24, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -3797,6 +5314,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -3805,6 +5366,49 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmslt.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -3849,9 +5453,23 @@ define @icmp_sge_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sge_vv_nxv8i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmsle.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.icmp.nxv8i64( %va, %vb, metadata !"sge", %m, i32 %evl) ret %v @@ -3862,6 +5480,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -3870,16 +5532,149 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmsle.vv v16, v24, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sge_vx_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vmv.v.x v24, a0 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmsle.vv v16, v24, v8, v0.t ; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -3892,6 +5687,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -3900,6 +5739,49 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmsle.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -3944,9 +5826,23 @@ define @icmp_slt_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_slt_vv_nxv8i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmslt.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.icmp.nxv8i64( %va, %vb, metadata !"slt", %m, i32 %evl) ret %v @@ -3957,6 +5853,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -3965,6 +5905,49 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmslt.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -3985,6 +5968,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -3993,6 +6020,49 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmslt.vv v16, v24, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -4037,9 +6107,23 @@ define @icmp_sle_vv_nxv8i64( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_sle_vv_nxv8i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmsle.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.icmp.nxv8i64( %va, %vb, metadata !"sle", %m, i32 %evl) ret %v @@ -4050,6 +6134,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -4058,6 +6186,49 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmsle.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -4078,6 +6249,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -4086,16 +6301,149 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vmsle.vv v16, v24, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: icmp_sle_vx_swap_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vmv.v.x v24, a0 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmsle.vv v16, v24, v8, v0.t ; RV64-NEXT: vmv1r.v v0, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %elt.head = insertelement poison, i64 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll --- a/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-integer.ll @@ -3229,6 +3229,14 @@ define @icmp_eq_vi_nx16i64( %va) { ; CHECK-LABEL: icmp_eq_vi_nx16i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: add a1, a0, a0 @@ -3237,6 +3245,12 @@ ; CHECK-NEXT: vmseq.vi v0, v8, 0 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, tu, ma ; CHECK-NEXT: vslideup.vx v0, v24, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %vc = icmp eq %va, zeroinitializer ret %vc diff --git a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll --- a/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/shuffle-reverse.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+f,+zfh,+zvfh,+d -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+f,+zfh,+zvfh,+d -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 +; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+f,+zfh,+zvfh,+d -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+f,+zfh,+zvfh,+d -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 define <2 x i8> @v2i8(<2 x i8> %a) { ; CHECK-LABEL: v2i8: @@ -368,6 +368,29 @@ define <16 x i32> @v8i32_2(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: v8i32_2: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v16, v10 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vid.v v20 @@ -380,6 +403,27 @@ ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vrgather.vv v12, v16, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v16i32 = shufflevector <8 x i32> %a, <8 x i32> %b, <16 x i32> ret <16 x i32> %v16i32 @@ -401,6 +445,93 @@ define <32 x i32> @v16i32_2(<16 x i32> %a, <16 x i32> %b) { ; RV32-LABEL: v16i32_2: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 14 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 13 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32-NEXT: lui a0, %hi(.LCPI23_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI23_0) ; RV32-NEXT: li a1, 32 @@ -417,10 +548,182 @@ ; RV32-NEXT: vmv.v.x v0, a0 ; RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; RV32-NEXT: vrgather.vv v8, v24, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 14 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 13 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: v16i32_2: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV64-NEXT: lui a0, %hi(.LCPI23_0) ; RV64-NEXT: addi a0, a0, %lo(.LCPI23_0) ; RV64-NEXT: li a1, 32 @@ -437,6 +740,91 @@ ; RV64-NEXT: vmv.v.x v0, a0 ; RV64-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; RV64-NEXT: vrgather.vv v8, v24, v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %v32i32 = shufflevector <16 x i32> %a, <16 x i32> %b, <32 x i32> ret <32 x i32> %v32i32 @@ -511,6 +899,29 @@ ; ; RV64-LABEL: v4i64_2: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill ; RV64-NEXT: vmv2r.v v16, v10 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vid.v v20 @@ -522,6 +933,27 @@ ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vrgather.vv v12, v16, v8, v0.t ; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %v8i64 = shufflevector <4 x i64> %a, <4 x i64> %b, <8 x i32> ret <8 x i64> %v8i64 @@ -719,6 +1151,29 @@ define <16 x float> @v8f32_2(<8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: v8f32_2: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v16, v10 ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vid.v v20 @@ -731,6 +1186,27 @@ ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vrgather.vv v12, v16, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v16f32 = shufflevector <8 x float> %a, <8 x float> %b, <16 x i32> ret <16 x float> %v16f32 @@ -805,6 +1281,29 @@ ; ; RV64-LABEL: v4f64_2: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill ; RV64-NEXT: vmv2r.v v16, v10 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vid.v v20 @@ -816,6 +1315,27 @@ ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vrgather.vv v12, v16, v8, v0.t ; RV64-NEXT: vmv.v.v v8, v12 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %v8f64 = shufflevector <4 x double> %a, <4 x double> %b, <8 x i32> ret <8 x double> %v8f64 diff --git a/llvm/test/CodeGen/RISCV/rvv/smulo-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/smulo-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/smulo-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/smulo-sdnode.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s declare { , } @llvm.smul.with.overflow.nxv1i8(, ) @@ -120,12 +120,102 @@ define @smulo_nxv64i8( %x, %y) { ; CHECK-LABEL: smulo_nxv64i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmulh.vv v24, v8, v16 ; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: vsra.vi v16, v8, 7 ; CHECK-NEXT: vmsne.vv v0, v24, v16 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = call { , } @llvm.smul.with.overflow.nxv64i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -234,12 +324,102 @@ define @smulo_nxv32i16( %x, %y) { ; CHECK-LABEL: smulo_nxv32i16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vmulh.vv v24, v8, v16 ; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: vsra.vi v16, v8, 15 ; CHECK-NEXT: vmsne.vv v0, v24, v16 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = call { , } @llvm.smul.with.overflow.nxv32i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -329,12 +509,102 @@ define @smulo_nxv16i32( %x, %y) { ; CHECK-LABEL: smulo_nxv16i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmulh.vv v24, v8, v16 ; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: vsra.vi v16, v8, 31 ; CHECK-NEXT: vmsne.vv v0, v24, v16 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = call { , } @llvm.smul.with.overflow.nxv16i32( %x, %y) %b = extractvalue { , } %a, 0 @@ -408,6 +678,52 @@ define @smulo_nxv8i64( %x, %y) { ; CHECK-LABEL: smulo_nxv8i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmulh.vv v24, v8, v16 ; CHECK-NEXT: vmul.vv v8, v8, v16 @@ -415,6 +731,50 @@ ; CHECK-NEXT: vsra.vx v16, v8, a0 ; CHECK-NEXT: vmsne.vv v0, v24, v16 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = call { , } @llvm.smul.with.overflow.nxv8i64( %x, %y) %b = extractvalue { , } %a, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll --- a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll @@ -647,18 +647,62 @@ define @mul_bigimm_stepvector_nxv16i64() { ; RV32-LABEL: mul_bigimm_stepvector_nxv16i64: ; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 8 * vlenb +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-NEXT: addi a0, sp, 32 +; RV32-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32-NEXT: li a0, 7 -; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: sw a0, 28(sp) ; RV32-NEXT: lui a0, 797989 ; RV32-NEXT: addi a0, a0, -683 -; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw a0, 24(sp) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: lui a1, 11557 ; RV32-NEXT: addi a1, a1, -683 ; RV32-NEXT: mul a1, a0, a1 -; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: srli a0, a0, 3 ; RV32-NEXT: li a1, 62 ; RV32-NEXT: mul a1, a0, a1 @@ -666,16 +710,59 @@ ; RV32-NEXT: addi a2, a2, -1368 ; RV32-NEXT: mulhu a0, a0, a2 ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: sw a0, 20(sp) +; RV32-NEXT: addi a0, sp, 24 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a0), zero -; RV32-NEXT: mv a0, sp +; RV32-NEXT: addi a0, sp, 16 ; RV32-NEXT: vlse64.v v16, (a0), zero ; RV32-NEXT: vid.v v24 ; RV32-NEXT: vmul.vv v8, v24, v8 ; RV32-NEXT: vadd.vv v16, v8, v16 -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 32 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 32 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; ; RV64-LABEL: mul_bigimm_stepvector_nxv16i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -899,6 +899,52 @@ ; CHECK-RV32-NEXT: # %bb.5: ; CHECK-RV32-NEXT: mv a3, a5 ; CHECK-RV32-NEXT: .LBB44_6: +; CHECK-RV32-NEXT: addi sp, sp, -16 +; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV32-NEXT: csrr a7, vlenb +; CHECK-RV32-NEXT: slli a7, a7, 3 +; CHECK-RV32-NEXT: sub sp, sp, a7 +; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-RV32-NEXT: csrr a7, vlenb +; CHECK-RV32-NEXT: slli t0, a7, 3 +; CHECK-RV32-NEXT: sub a7, t0, a7 +; CHECK-RV32-NEXT: add a7, sp, a7 +; CHECK-RV32-NEXT: addi a7, a7, 16 +; CHECK-RV32-NEXT: vs1r.v v24, (a7) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a7, vlenb +; CHECK-RV32-NEXT: li t0, 6 +; CHECK-RV32-NEXT: mul a7, a7, t0 +; CHECK-RV32-NEXT: add a7, sp, a7 +; CHECK-RV32-NEXT: addi a7, a7, 16 +; CHECK-RV32-NEXT: vs1r.v v25, (a7) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a7, vlenb +; CHECK-RV32-NEXT: slli t0, a7, 2 +; CHECK-RV32-NEXT: add a7, t0, a7 +; CHECK-RV32-NEXT: add a7, sp, a7 +; CHECK-RV32-NEXT: addi a7, a7, 16 +; CHECK-RV32-NEXT: vs1r.v v26, (a7) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a7, vlenb +; CHECK-RV32-NEXT: slli a7, a7, 2 +; CHECK-RV32-NEXT: add a7, sp, a7 +; CHECK-RV32-NEXT: addi a7, a7, 16 +; CHECK-RV32-NEXT: vs1r.v v27, (a7) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a7, vlenb +; CHECK-RV32-NEXT: slli t0, a7, 1 +; CHECK-RV32-NEXT: add a7, t0, a7 +; CHECK-RV32-NEXT: add a7, sp, a7 +; CHECK-RV32-NEXT: addi a7, a7, 16 +; CHECK-RV32-NEXT: vs1r.v v28, (a7) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a7, vlenb +; CHECK-RV32-NEXT: slli a7, a7, 1 +; CHECK-RV32-NEXT: add a7, sp, a7 +; CHECK-RV32-NEXT: addi a7, a7, 16 +; CHECK-RV32-NEXT: vs1r.v v29, (a7) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a7, vlenb +; CHECK-RV32-NEXT: add a7, sp, a7 +; CHECK-RV32-NEXT: addi a7, a7, 16 +; CHECK-RV32-NEXT: vs1r.v v30, (a7) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: addi a7, sp, 16 +; CHECK-RV32-NEXT: vs1r.v v31, (a7) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: srli a5, a5, 2 ; CHECK-RV32-NEXT: vsetvli a7, zero, e8, mf2, ta, ma ; CHECK-RV32-NEXT: vslidedown.vx v0, v8, a5 @@ -910,6 +956,50 @@ ; CHECK-RV32-NEXT: vmv1r.v v0, v8 ; CHECK-RV32-NEXT: vlse64.v v8, (a0), a1, v0.t ; CHECK-RV32-NEXT: vs1r.v v24, (a4) +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 3 +; CHECK-RV32-NEXT: sub a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: li a1, 6 +; CHECK-RV32-NEXT: mul a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 2 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 1 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: addi a0, sp, 16 +; CHECK-RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 3 +; CHECK-RV32-NEXT: add sp, sp, a0 +; CHECK-RV32-NEXT: addi sp, sp, 16 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: strided_load_nxv17f64: @@ -946,6 +1036,52 @@ ; CHECK-RV64-NEXT: # %bb.5: ; CHECK-RV64-NEXT: mv a2, a5 ; CHECK-RV64-NEXT: .LBB44_6: +; CHECK-RV64-NEXT: addi sp, sp, -16 +; CHECK-RV64-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV64-NEXT: csrr a7, vlenb +; CHECK-RV64-NEXT: slli a7, a7, 3 +; CHECK-RV64-NEXT: sub sp, sp, a7 +; CHECK-RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-RV64-NEXT: csrr a7, vlenb +; CHECK-RV64-NEXT: slli t0, a7, 3 +; CHECK-RV64-NEXT: sub a7, t0, a7 +; CHECK-RV64-NEXT: add a7, sp, a7 +; CHECK-RV64-NEXT: addi a7, a7, 16 +; CHECK-RV64-NEXT: vs1r.v v24, (a7) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a7, vlenb +; CHECK-RV64-NEXT: li t0, 6 +; CHECK-RV64-NEXT: mul a7, a7, t0 +; CHECK-RV64-NEXT: add a7, sp, a7 +; CHECK-RV64-NEXT: addi a7, a7, 16 +; CHECK-RV64-NEXT: vs1r.v v25, (a7) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a7, vlenb +; CHECK-RV64-NEXT: slli t0, a7, 2 +; CHECK-RV64-NEXT: add a7, t0, a7 +; CHECK-RV64-NEXT: add a7, sp, a7 +; CHECK-RV64-NEXT: addi a7, a7, 16 +; CHECK-RV64-NEXT: vs1r.v v26, (a7) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a7, vlenb +; CHECK-RV64-NEXT: slli a7, a7, 2 +; CHECK-RV64-NEXT: add a7, sp, a7 +; CHECK-RV64-NEXT: addi a7, a7, 16 +; CHECK-RV64-NEXT: vs1r.v v27, (a7) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a7, vlenb +; CHECK-RV64-NEXT: slli t0, a7, 1 +; CHECK-RV64-NEXT: add a7, t0, a7 +; CHECK-RV64-NEXT: add a7, sp, a7 +; CHECK-RV64-NEXT: addi a7, a7, 16 +; CHECK-RV64-NEXT: vs1r.v v28, (a7) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a7, vlenb +; CHECK-RV64-NEXT: slli a7, a7, 1 +; CHECK-RV64-NEXT: add a7, sp, a7 +; CHECK-RV64-NEXT: addi a7, a7, 16 +; CHECK-RV64-NEXT: vs1r.v v29, (a7) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a7, vlenb +; CHECK-RV64-NEXT: add a7, sp, a7 +; CHECK-RV64-NEXT: addi a7, a7, 16 +; CHECK-RV64-NEXT: vs1r.v v30, (a7) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: addi a7, sp, 16 +; CHECK-RV64-NEXT: vs1r.v v31, (a7) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: srli a5, a5, 2 ; CHECK-RV64-NEXT: vsetvli a7, zero, e8, mf2, ta, ma ; CHECK-RV64-NEXT: vslidedown.vx v0, v8, a5 @@ -957,6 +1093,50 @@ ; CHECK-RV64-NEXT: vmv1r.v v0, v8 ; CHECK-RV64-NEXT: vlse64.v v8, (a0), a1, v0.t ; CHECK-RV64-NEXT: vs1r.v v24, (a3) +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a1, a0, 3 +; CHECK-RV64-NEXT: sub a0, a1, a0 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: li a1, 6 +; CHECK-RV64-NEXT: mul a0, a0, a1 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a1, a0, 2 +; CHECK-RV64-NEXT: add a0, a1, a0 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a0, a0, 2 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a1, a0, 1 +; CHECK-RV64-NEXT: add a0, a1, a0 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a0, a0, 1 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: addi a0, sp, 16 +; CHECK-RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a0, a0, 3 +; CHECK-RV64-NEXT: add sp, sp, a0 +; CHECK-RV64-NEXT: addi sp, sp, 16 ; CHECK-RV64-NEXT: ret %v = call @llvm.experimental.vp.strided.load.nxv17f64.p0.i64(ptr %ptr, i64 %stride, %mask, i32 %evl) %lo = call @llvm.experimental.vector.extract.nxv16f64( %v, i64 0) diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpstore.ll @@ -720,6 +720,59 @@ define void @strided_store_nxv17f64( %v, ptr %ptr, i32 signext %stride, %mask, i32 zeroext %evl) { ; CHECK-RV32-LABEL: strided_store_nxv17f64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -16 +; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV32-NEXT: csrr a4, vlenb +; CHECK-RV32-NEXT: slli a4, a4, 4 +; CHECK-RV32-NEXT: sub sp, sp, a4 +; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-RV32-NEXT: csrr a4, vlenb +; CHECK-RV32-NEXT: slli a5, a4, 4 +; CHECK-RV32-NEXT: sub a4, a5, a4 +; CHECK-RV32-NEXT: add a4, sp, a4 +; CHECK-RV32-NEXT: addi a4, a4, 16 +; CHECK-RV32-NEXT: vs1r.v v1, (a4) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a4, vlenb +; CHECK-RV32-NEXT: li a5, 14 +; CHECK-RV32-NEXT: mul a4, a4, a5 +; CHECK-RV32-NEXT: add a4, sp, a4 +; CHECK-RV32-NEXT: addi a4, a4, 16 +; CHECK-RV32-NEXT: vs1r.v v2, (a4) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a4, vlenb +; CHECK-RV32-NEXT: li a5, 13 +; CHECK-RV32-NEXT: mul a4, a4, a5 +; CHECK-RV32-NEXT: add a4, sp, a4 +; CHECK-RV32-NEXT: addi a4, a4, 16 +; CHECK-RV32-NEXT: vs1r.v v3, (a4) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a4, vlenb +; CHECK-RV32-NEXT: li a5, 12 +; CHECK-RV32-NEXT: mul a4, a4, a5 +; CHECK-RV32-NEXT: add a4, sp, a4 +; CHECK-RV32-NEXT: addi a4, a4, 16 +; CHECK-RV32-NEXT: vs1r.v v4, (a4) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a4, vlenb +; CHECK-RV32-NEXT: li a5, 11 +; CHECK-RV32-NEXT: mul a4, a4, a5 +; CHECK-RV32-NEXT: add a4, sp, a4 +; CHECK-RV32-NEXT: addi a4, a4, 16 +; CHECK-RV32-NEXT: vs1r.v v5, (a4) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a4, vlenb +; CHECK-RV32-NEXT: li a5, 10 +; CHECK-RV32-NEXT: mul a4, a4, a5 +; CHECK-RV32-NEXT: add a4, sp, a4 +; CHECK-RV32-NEXT: addi a4, a4, 16 +; CHECK-RV32-NEXT: vs1r.v v6, (a4) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a4, vlenb +; CHECK-RV32-NEXT: slli a5, a4, 3 +; CHECK-RV32-NEXT: add a4, a5, a4 +; CHECK-RV32-NEXT: add a4, sp, a4 +; CHECK-RV32-NEXT: addi a4, a4, 16 +; CHECK-RV32-NEXT: vs1r.v v7, (a4) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a4, vlenb +; CHECK-RV32-NEXT: slli a4, a4, 3 +; CHECK-RV32-NEXT: add a4, sp, a4 +; CHECK-RV32-NEXT: addi a4, a4, 16 +; CHECK-RV32-NEXT: vs1r.v v24, (a4) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: csrr a4, vlenb ; CHECK-RV32-NEXT: slli a6, a4, 1 ; CHECK-RV32-NEXT: vmv1r.v v24, v0 @@ -733,12 +786,6 @@ ; CHECK-RV32-NEXT: # %bb.3: ; CHECK-RV32-NEXT: mv a7, a4 ; CHECK-RV32-NEXT: .LBB36_4: -; CHECK-RV32-NEXT: addi sp, sp, -16 -; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 -; CHECK-RV32-NEXT: csrr t0, vlenb -; CHECK-RV32-NEXT: slli t0, t0, 3 -; CHECK-RV32-NEXT: sub sp, sp, t0 -; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-RV32-NEXT: vl8re64.v v0, (a0) ; CHECK-RV32-NEXT: addi a0, sp, 16 ; CHECK-RV32-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill @@ -774,13 +821,113 @@ ; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV32-NEXT: vsse64.v v8, (a1), a2, v0.t ; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 4 +; CHECK-RV32-NEXT: sub a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: li a1, 14 +; CHECK-RV32-NEXT: mul a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: li a1, 13 +; CHECK-RV32-NEXT: mul a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: li a1, 12 +; CHECK-RV32-NEXT: mul a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: li a1, 11 +; CHECK-RV32-NEXT: mul a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: li a1, 10 +; CHECK-RV32-NEXT: mul a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 3 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 3 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 4 ; CHECK-RV32-NEXT: add sp, sp, a0 ; CHECK-RV32-NEXT: addi sp, sp, 16 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: strided_store_nxv17f64: ; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: addi sp, sp, -16 +; CHECK-RV64-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV64-NEXT: csrr a4, vlenb +; CHECK-RV64-NEXT: slli a4, a4, 4 +; CHECK-RV64-NEXT: sub sp, sp, a4 +; CHECK-RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-RV64-NEXT: csrr a4, vlenb +; CHECK-RV64-NEXT: slli a5, a4, 4 +; CHECK-RV64-NEXT: sub a4, a5, a4 +; CHECK-RV64-NEXT: add a4, sp, a4 +; CHECK-RV64-NEXT: addi a4, a4, 16 +; CHECK-RV64-NEXT: vs1r.v v1, (a4) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a4, vlenb +; CHECK-RV64-NEXT: li a5, 14 +; CHECK-RV64-NEXT: mul a4, a4, a5 +; CHECK-RV64-NEXT: add a4, sp, a4 +; CHECK-RV64-NEXT: addi a4, a4, 16 +; CHECK-RV64-NEXT: vs1r.v v2, (a4) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a4, vlenb +; CHECK-RV64-NEXT: li a5, 13 +; CHECK-RV64-NEXT: mul a4, a4, a5 +; CHECK-RV64-NEXT: add a4, sp, a4 +; CHECK-RV64-NEXT: addi a4, a4, 16 +; CHECK-RV64-NEXT: vs1r.v v3, (a4) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a4, vlenb +; CHECK-RV64-NEXT: li a5, 12 +; CHECK-RV64-NEXT: mul a4, a4, a5 +; CHECK-RV64-NEXT: add a4, sp, a4 +; CHECK-RV64-NEXT: addi a4, a4, 16 +; CHECK-RV64-NEXT: vs1r.v v4, (a4) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a4, vlenb +; CHECK-RV64-NEXT: li a5, 11 +; CHECK-RV64-NEXT: mul a4, a4, a5 +; CHECK-RV64-NEXT: add a4, sp, a4 +; CHECK-RV64-NEXT: addi a4, a4, 16 +; CHECK-RV64-NEXT: vs1r.v v5, (a4) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a4, vlenb +; CHECK-RV64-NEXT: li a5, 10 +; CHECK-RV64-NEXT: mul a4, a4, a5 +; CHECK-RV64-NEXT: add a4, sp, a4 +; CHECK-RV64-NEXT: addi a4, a4, 16 +; CHECK-RV64-NEXT: vs1r.v v6, (a4) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a4, vlenb +; CHECK-RV64-NEXT: slli a5, a4, 3 +; CHECK-RV64-NEXT: add a4, a5, a4 +; CHECK-RV64-NEXT: add a4, sp, a4 +; CHECK-RV64-NEXT: addi a4, a4, 16 +; CHECK-RV64-NEXT: vs1r.v v7, (a4) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a4, vlenb +; CHECK-RV64-NEXT: slli a4, a4, 3 +; CHECK-RV64-NEXT: add a4, sp, a4 +; CHECK-RV64-NEXT: addi a4, a4, 16 +; CHECK-RV64-NEXT: vs1r.v v24, (a4) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: csrr a4, vlenb ; CHECK-RV64-NEXT: slli a6, a4, 1 ; CHECK-RV64-NEXT: vmv1r.v v24, v0 @@ -794,12 +941,6 @@ ; CHECK-RV64-NEXT: # %bb.3: ; CHECK-RV64-NEXT: mv a7, a4 ; CHECK-RV64-NEXT: .LBB36_4: -; CHECK-RV64-NEXT: addi sp, sp, -16 -; CHECK-RV64-NEXT: .cfi_def_cfa_offset 16 -; CHECK-RV64-NEXT: csrr t0, vlenb -; CHECK-RV64-NEXT: slli t0, t0, 3 -; CHECK-RV64-NEXT: sub sp, sp, t0 -; CHECK-RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-RV64-NEXT: vl8re64.v v0, (a0) ; CHECK-RV64-NEXT: addi a0, sp, 16 ; CHECK-RV64-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill @@ -835,7 +976,54 @@ ; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV64-NEXT: vsse64.v v8, (a1), a2, v0.t ; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a1, a0, 4 +; CHECK-RV64-NEXT: sub a0, a1, a0 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: li a1, 14 +; CHECK-RV64-NEXT: mul a0, a0, a1 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: li a1, 13 +; CHECK-RV64-NEXT: mul a0, a0, a1 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: li a1, 12 +; CHECK-RV64-NEXT: mul a0, a0, a1 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: li a1, 11 +; CHECK-RV64-NEXT: mul a0, a0, a1 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: li a1, 10 +; CHECK-RV64-NEXT: mul a0, a0, a1 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a1, a0, 3 +; CHECK-RV64-NEXT: add a0, a1, a0 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: slli a0, a0, 3 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a0, a0, 4 ; CHECK-RV64-NEXT: add sp, sp, a0 ; CHECK-RV64-NEXT: addi sp, sp, 16 ; CHECK-RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s declare { , } @llvm.umul.with.overflow.nxv1i8(, ) @@ -114,11 +114,101 @@ define @umulo_nxv64i8( %x, %y) { ; CHECK-LABEL: umulo_nxv64i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmulhu.vv v24, v8, v16 ; CHECK-NEXT: vmsne.vi v0, v24, 0 ; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv64i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -222,11 +312,101 @@ define @umulo_nxv32i16( %x, %y) { ; CHECK-LABEL: umulo_nxv32i16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vmulhu.vv v24, v8, v16 ; CHECK-NEXT: vmsne.vi v0, v24, 0 ; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv32i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -312,11 +492,101 @@ define @umulo_nxv16i32( %x, %y) { ; CHECK-LABEL: umulo_nxv16i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmulhu.vv v24, v8, v16 ; CHECK-NEXT: vmsne.vi v0, v24, 0 ; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv16i32( %x, %y) %b = extractvalue { , } %a, 0 @@ -384,11 +654,101 @@ define @umulo_nxv8i64( %x, %y) { ; CHECK-LABEL: umulo_nxv8i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmulhu.vv v24, v8, v16 ; CHECK-NEXT: vmsne.vi v0, v24, 0 ; CHECK-NEXT: vmul.vv v8, v8, v16 ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv8i64( %x, %y) %b = extractvalue { , } %a, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll --- a/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/unmasked-tu.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vle.nxv1i8( @@ -597,9 +597,51 @@ define @intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl4re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m4, tu, ma ; CHECK-NEXT: vfwsub.wv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16( @@ -2492,6 +2534,49 @@ ; RV32-LABEL: intrinsic_vmerge_vxm_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -2499,6 +2584,49 @@ ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV32-NEXT: vmerge.vvm v8, v16, v24, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -2522,6 +2650,49 @@ ; RV32-LABEL: intrinsic_vmerge_vim_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 15 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: li a1, -1 @@ -2531,6 +2702,49 @@ ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV32-NEXT: vmerge.vvm v8, v16, v24, v0 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vaadd.ll b/llvm/test/CodeGen/RISCV/rvv/vaadd.ll --- a/llvm/test/CodeGen/RISCV/rvv/vaadd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vaadd.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vaadd.nxv1i8.nxv1i8( , @@ -324,10 +324,98 @@ define @intrinsic_vaadd_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vaadd_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 0 ; CHECK-NEXT: vaadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vaadd.mask.nxv64i8.nxv64i8( @@ -613,10 +701,98 @@ define @intrinsic_vaadd_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vaadd_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 0 ; CHECK-NEXT: vaadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vaadd.mask.nxv32i16.nxv32i16( @@ -854,10 +1030,98 @@ define @intrinsic_vaadd_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vaadd_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 0 ; CHECK-NEXT: vaadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vaadd.mask.nxv16i32.nxv16i32( @@ -1047,10 +1311,98 @@ define @intrinsic_vaadd_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vaadd_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 0 ; CHECK-NEXT: vaadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vaadd.mask.nxv8i64.nxv8i64( @@ -2196,6 +2548,49 @@ ; RV32-LABEL: intrinsic_vaadd_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -2203,6 +2598,49 @@ ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: csrwi vxrm, 0 ; RV32-NEXT: vaadd.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vaaddu.ll b/llvm/test/CodeGen/RISCV/rvv/vaaddu.ll --- a/llvm/test/CodeGen/RISCV/rvv/vaaddu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vaaddu.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vaaddu.nxv1i8.nxv1i8( , @@ -324,10 +324,98 @@ define @intrinsic_vaaddu_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vaaddu_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 1 ; CHECK-NEXT: vaaddu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vaaddu.mask.nxv64i8.nxv64i8( @@ -613,10 +701,98 @@ define @intrinsic_vaaddu_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vaaddu_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 1 ; CHECK-NEXT: vaaddu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vaaddu.mask.nxv32i16.nxv32i16( @@ -854,10 +1030,98 @@ define @intrinsic_vaaddu_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vaaddu_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 1 ; CHECK-NEXT: vaaddu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vaaddu.mask.nxv16i32.nxv16i32( @@ -1047,10 +1311,98 @@ define @intrinsic_vaaddu_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vaaddu_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 1 ; CHECK-NEXT: vaaddu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vaaddu.mask.nxv8i64.nxv8i64( @@ -2196,6 +2548,49 @@ ; RV32-LABEL: intrinsic_vaaddu_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -2203,6 +2598,49 @@ ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: csrwi vxrm, 1 ; RV32-NEXT: vaaddu.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll @@ -645,6 +645,14 @@ define @vadd_vi_nxv128i8( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vadd_vi_nxv128i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a0) @@ -663,6 +671,12 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 -1, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1539,6 +1553,14 @@ define @vadd_vi_nxv32i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vadd_vi_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 @@ -1558,6 +1580,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 -1, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1601,6 +1629,14 @@ define @vadd_vi_nxv32i32_evl_nx8( %va, %m) { ; CHECK-LABEL: vadd_vi_nxv32i32_evl_nx8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: srli a1, a0, 2 @@ -1620,6 +1656,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vadd.vi v8, v8, -1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 -1, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1648,6 +1690,14 @@ ; ; RV64-LABEL: vadd_vi_nxv32i32_evl_nx16: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: srli a1, a0, 2 ; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma @@ -1658,6 +1708,12 @@ ; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vadd.vi v16, v16, -1, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %elt.head = insertelement poison, i32 -1, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd.ll b/llvm/test/CodeGen/RISCV/rvv/vadd.ll --- a/llvm/test/CodeGen/RISCV/rvv/vadd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vadd.nxv1i8.nxv1i8( , @@ -311,9 +311,97 @@ define @intrinsic_vadd_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vadd_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vadd.mask.nxv64i8.nxv64i8( @@ -588,9 +676,97 @@ define @intrinsic_vadd_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vadd_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vadd.mask.nxv32i16.nxv32i16( @@ -819,9 +995,97 @@ define @intrinsic_vadd_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vadd_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vadd.mask.nxv16i32.nxv16i32( @@ -1004,9 +1268,97 @@ define @intrinsic_vadd_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vadd_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vadd.mask.nxv8i64.nxv8i64( @@ -2145,12 +2497,98 @@ ; RV32-LABEL: intrinsic_vadd_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vadd.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vand.ll b/llvm/test/CodeGen/RISCV/rvv/vand.ll --- a/llvm/test/CodeGen/RISCV/rvv/vand.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vand.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vand.nxv1i8.nxv1i8( , @@ -318,9 +318,97 @@ define @intrinsic_vand_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vand_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vand.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vand.mask.nxv64i8.nxv64i8( @@ -601,9 +689,97 @@ define @intrinsic_vand_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vand_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vand.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vand.mask.nxv32i16.nxv32i16( @@ -837,9 +1013,97 @@ define @intrinsic_vand_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vand_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vand.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vand.mask.nxv16i32.nxv16i32( @@ -1026,9 +1290,97 @@ define @intrinsic_vand_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vand_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vand.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vand.mask.nxv8i64.nxv8i64( @@ -2146,12 +2498,98 @@ ; RV32-LABEL: intrinsic_vand_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vand.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vandn.ll b/llvm/test/CodeGen/RISCV/rvv/vandn.ll --- a/llvm/test/CodeGen/RISCV/rvv/vandn.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vandn.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+experimental-zvbb \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+experimental-zvbb \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vandn.nxv1i8.nxv1i8( @@ -319,9 +319,97 @@ define @intrinsic_vandn_mask_vv_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vandn_mask_vv_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vandn.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vandn.mask.nxv64i8.nxv64i8( @@ -602,9 +690,97 @@ define @intrinsic_vandn_mask_vv_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vandn_mask_vv_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vandn.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vandn.mask.nxv32i16.nxv32i16( @@ -838,9 +1014,97 @@ define @intrinsic_vandn_mask_vv_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vandn_mask_vv_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vandn.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vandn.mask.nxv16i32.nxv16i32( @@ -1027,9 +1291,97 @@ define @intrinsic_vandn_mask_vv_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vandn_mask_vv_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vandn.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vandn.mask.nxv8i64.nxv8i64( @@ -2147,12 +2499,98 @@ ; RV32-LABEL: intrinsic_vandn_mask_vx_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vandn.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vasub.ll b/llvm/test/CodeGen/RISCV/rvv/vasub.ll --- a/llvm/test/CodeGen/RISCV/rvv/vasub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vasub.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vasub.nxv1i8.nxv1i8( , @@ -324,10 +324,98 @@ define @intrinsic_vasub_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vasub_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 1 ; CHECK-NEXT: vasub.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vasub.mask.nxv64i8.nxv64i8( @@ -613,10 +701,98 @@ define @intrinsic_vasub_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vasub_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 1 ; CHECK-NEXT: vasub.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vasub.mask.nxv32i16.nxv32i16( @@ -854,10 +1030,98 @@ define @intrinsic_vasub_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vasub_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 1 ; CHECK-NEXT: vasub.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vasub.mask.nxv16i32.nxv16i32( @@ -1047,10 +1311,98 @@ define @intrinsic_vasub_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vasub_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 1 ; CHECK-NEXT: vasub.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vasub.mask.nxv8i64.nxv8i64( @@ -2196,6 +2548,49 @@ ; RV32-LABEL: intrinsic_vasub_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -2203,6 +2598,49 @@ ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: csrwi vxrm, 1 ; RV32-NEXT: vasub.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vasubu.ll b/llvm/test/CodeGen/RISCV/rvv/vasubu.ll --- a/llvm/test/CodeGen/RISCV/rvv/vasubu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vasubu.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vasubu.nxv1i8.nxv1i8( , @@ -324,10 +324,98 @@ define @intrinsic_vasubu_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vasubu_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 1 ; CHECK-NEXT: vasubu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vasubu.mask.nxv64i8.nxv64i8( @@ -613,10 +701,98 @@ define @intrinsic_vasubu_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vasubu_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 1 ; CHECK-NEXT: vasubu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vasubu.mask.nxv32i16.nxv32i16( @@ -854,10 +1030,98 @@ define @intrinsic_vasubu_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vasubu_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 1 ; CHECK-NEXT: vasubu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vasubu.mask.nxv16i32.nxv16i32( @@ -1047,10 +1311,98 @@ define @intrinsic_vasubu_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vasubu_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 1 ; CHECK-NEXT: vasubu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vasubu.mask.nxv8i64.nxv8i64( @@ -2196,6 +2548,49 @@ ; RV32-LABEL: intrinsic_vasubu_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -2203,6 +2598,49 @@ ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: csrwi vxrm, 1 ; RV32-NEXT: vasubu.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vclmul.ll b/llvm/test/CodeGen/RISCV/rvv/vclmul.ll --- a/llvm/test/CodeGen/RISCV/rvv/vclmul.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vclmul.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvbc \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+experimental-zvbc \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvbc \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+experimental-zvbc \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vclmul.nxv1i64.nxv1i64( @@ -38,7 +38,7 @@ ; CHECK-LABEL: intrinsic_vclmul_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vclmul.vv v8, v9, v10 +; CHECK-NEXT: vclmul.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vclmul.mask.nxv1i64.nxv1i64( @@ -85,7 +85,7 @@ ; CHECK-LABEL: intrinsic_vclmul_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vclmul.vv v8, v10, v12 +; CHECK-NEXT: vclmul.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vclmul.mask.nxv2i64.nxv2i64( @@ -132,7 +132,7 @@ ; CHECK-LABEL: intrinsic_vclmul_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vclmul.vv v8, v12, v16 +; CHECK-NEXT: vclmul.vv v8, v12, v16, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vclmul.mask.nxv4i64.nxv4i64( @@ -178,9 +178,97 @@ define @intrinsic_vclmul_mask_vv_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vclmul_mask_vv_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vclmul.vv v8, v16, v24 +; CHECK-NEXT: vclmul.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vclmul.mask.nxv8i64.nxv8i64( @@ -244,14 +332,14 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vclmul.vv v8, v9, v10 +; RV32-NEXT: vclmul.vv v8, v9, v10, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vclmul_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; RV64-NEXT: vclmul.vx v8, v9, a0 +; RV64-NEXT: vclmul.vx v8, v9, a0, v0.t ; RV64-NEXT: ret entry: %a = call @llvm.riscv.vclmul.mask.nxv1i64.i64( @@ -315,14 +403,14 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vclmul.vv v8, v10, v12 +; RV32-NEXT: vclmul.vv v8, v10, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vclmul_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; RV64-NEXT: vclmul.vx v8, v10, a0 +; RV64-NEXT: vclmul.vx v8, v10, a0, v0.t ; RV64-NEXT: ret entry: %a = call @llvm.riscv.vclmul.mask.nxv2i64.i64( @@ -386,14 +474,14 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vclmul.vv v8, v12, v16 +; RV32-NEXT: vclmul.vv v8, v12, v16, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vclmul_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; RV64-NEXT: vclmul.vx v8, v12, a0 +; RV64-NEXT: vclmul.vx v8, v12, a0, v0.t ; RV64-NEXT: ret entry: %a = call @llvm.riscv.vclmul.mask.nxv4i64.i64( @@ -452,19 +540,105 @@ ; RV32-LABEL: intrinsic_vclmul_mask_vx_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vclmul.vv v8, v16, v24 +; RV32-NEXT: vclmul.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vclmul_mask_vx_nxv8i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; RV64-NEXT: vclmul.vx v8, v16, a0 +; RV64-NEXT: vclmul.vx v8, v16, a0, v0.t ; RV64-NEXT: ret entry: %a = call @llvm.riscv.vclmul.mask.nxv8i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vclmulh.ll b/llvm/test/CodeGen/RISCV/rvv/vclmulh.ll --- a/llvm/test/CodeGen/RISCV/rvv/vclmulh.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vclmulh.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvbc \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+experimental-zvbc \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvbc \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+experimental-zvbc \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vclmulh.nxv1i64.nxv1i64( @@ -38,7 +38,7 @@ ; CHECK-LABEL: intrinsic_vclmulh_mask_vv_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, mu -; CHECK-NEXT: vclmulh.vv v8, v9, v10 +; CHECK-NEXT: vclmulh.vv v8, v9, v10, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vclmulh.mask.nxv1i64.nxv1i64( @@ -85,7 +85,7 @@ ; CHECK-LABEL: intrinsic_vclmulh_mask_vv_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, mu -; CHECK-NEXT: vclmulh.vv v8, v10, v12 +; CHECK-NEXT: vclmulh.vv v8, v10, v12, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vclmulh.mask.nxv2i64.nxv2i64( @@ -132,7 +132,7 @@ ; CHECK-LABEL: intrinsic_vclmulh_mask_vv_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, mu -; CHECK-NEXT: vclmulh.vv v8, v12, v16 +; CHECK-NEXT: vclmulh.vv v8, v12, v16, v0.t ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vclmulh.mask.nxv4i64.nxv4i64( @@ -178,9 +178,97 @@ define @intrinsic_vclmulh_mask_vv_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vclmulh_mask_vv_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; CHECK-NEXT: vclmulh.vv v8, v16, v24 +; CHECK-NEXT: vclmulh.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vclmulh.mask.nxv8i64.nxv8i64( @@ -244,14 +332,14 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v10, (a0), zero -; RV32-NEXT: vclmulh.vv v8, v9, v10 +; RV32-NEXT: vclmulh.vv v8, v9, v10, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vclmulh_mask_vx_nxv1i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m1, ta, mu -; RV64-NEXT: vclmulh.vx v8, v9, a0 +; RV64-NEXT: vclmulh.vx v8, v9, a0, v0.t ; RV64-NEXT: ret entry: %a = call @llvm.riscv.vclmulh.mask.nxv1i64.i64( @@ -315,14 +403,14 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vclmulh.vv v8, v10, v12 +; RV32-NEXT: vclmulh.vv v8, v10, v12, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vclmulh_mask_vx_nxv2i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m2, ta, mu -; RV64-NEXT: vclmulh.vx v8, v10, a0 +; RV64-NEXT: vclmulh.vx v8, v10, a0, v0.t ; RV64-NEXT: ret entry: %a = call @llvm.riscv.vclmulh.mask.nxv2i64.i64( @@ -386,14 +474,14 @@ ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vclmulh.vv v8, v12, v16 +; RV32-NEXT: vclmulh.vv v8, v12, v16, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vclmulh_mask_vx_nxv4i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m4, ta, mu -; RV64-NEXT: vclmulh.vx v8, v12, a0 +; RV64-NEXT: vclmulh.vx v8, v12, a0, v0.t ; RV64-NEXT: ret entry: %a = call @llvm.riscv.vclmulh.mask.nxv4i64.i64( @@ -452,19 +540,105 @@ ; RV32-LABEL: intrinsic_vclmulh_mask_vx_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero -; RV32-NEXT: vclmulh.vv v8, v16, v24 +; RV32-NEXT: vclmulh.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: intrinsic_vclmulh_mask_vx_nxv8i64_i64: ; RV64: # %bb.0: # %entry ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, mu -; RV64-NEXT: vclmulh.vx v8, v16, a0 +; RV64-NEXT: vclmulh.vx v8, v16, a0, v0.t ; RV64-NEXT: ret entry: %a = call @llvm.riscv.vclmulh.mask.nxv8i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vdiv.ll b/llvm/test/CodeGen/RISCV/rvv/vdiv.ll --- a/llvm/test/CodeGen/RISCV/rvv/vdiv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdiv.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vdiv.nxv1i8.nxv1i8( , @@ -318,9 +318,97 @@ define @intrinsic_vdiv_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vdiv_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vdiv.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vdiv.mask.nxv64i8.nxv64i8( @@ -601,9 +689,97 @@ define @intrinsic_vdiv_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vdiv_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vdiv.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vdiv.mask.nxv32i16.nxv32i16( @@ -837,9 +1013,97 @@ define @intrinsic_vdiv_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vdiv_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vdiv.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vdiv.mask.nxv16i32.nxv16i32( @@ -1026,9 +1290,97 @@ define @intrinsic_vdiv_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vdiv_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vdiv.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vdiv.mask.nxv8i64.nxv8i64( @@ -2146,12 +2498,98 @@ ; RV32-LABEL: intrinsic_vdiv_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vdiv.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vdivu.ll b/llvm/test/CodeGen/RISCV/rvv/vdivu.ll --- a/llvm/test/CodeGen/RISCV/rvv/vdivu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vdivu.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vdivu.nxv1i8.nxv1i8( , @@ -318,9 +318,97 @@ define @intrinsic_vdivu_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vdivu_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vdivu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vdivu.mask.nxv64i8.nxv64i8( @@ -601,9 +689,97 @@ define @intrinsic_vdivu_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vdivu_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vdivu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vdivu.mask.nxv32i16.nxv32i16( @@ -837,9 +1013,97 @@ define @intrinsic_vdivu_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vdivu_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vdivu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vdivu.mask.nxv16i32.nxv16i32( @@ -1026,9 +1290,97 @@ define @intrinsic_vdivu_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vdivu_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vdivu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vdivu.mask.nxv8i64.nxv8i64( @@ -2146,12 +2498,98 @@ ; RV32-LABEL: intrinsic_vdivu_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vdivu.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll @@ -104,10 +104,99 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: li a2, 40 ; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 38 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 37 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 36 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 35 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 34 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 5 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 5 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 30 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 29 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 28 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 27 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 26 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 25 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, a0, a1 @@ -148,8 +237,97 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmv4r.v v20, v24 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 38 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 37 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 36 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 35 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 34 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 5 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 5 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 30 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 29 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 28 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 27 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 25 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 40 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll @@ -94,9 +94,94 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.i v24, 0 @@ -121,7 +206,92 @@ ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl1r.v v0, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -132,6 +302,93 @@ define {, } @vector_deinterleave_nxv64i8_nxv128i8( %vec) { ; CHECK-LABEL: vector_deinterleave_nxv64i8_nxv128i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v12, v16, 0 @@ -139,6 +396,91 @@ ; CHECK-NEXT: vnsrl.wi v4, v16, 8 ; CHECK-NEXT: vnsrl.wi v0, v24, 8 ; CHECK-NEXT: vmv8r.v v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %retval = call {, } @llvm.experimental.vector.deinterleave2.nxv128i8( %vec) ret {, } %retval @@ -147,250 +489,1208 @@ define {, } @vector_deinterleave_nxv32i16_nxv64i16( %vec) { ; CHECK-LABEL: vector_deinterleave_nxv32i16_nxv64i16: ; CHECK: # %bb.0: -; CHECK-NEXT: vmv8r.v v24, v8 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v12, v16, 0 -; CHECK-NEXT: vnsrl.wi v8, v24, 0 -; CHECK-NEXT: vnsrl.wi v4, v16, 16 -; CHECK-NEXT: vnsrl.wi v0, v24, 16 -; CHECK-NEXT: vmv8r.v v16, v0 -; CHECK-NEXT: ret -%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv64i16( %vec) -ret {, } %retval -} - -define {, } @vector_deinterleave_nxv16i32_nxvv32i32( %vec) { -; CHECK-LABEL: vector_deinterleave_nxv16i32_nxvv32i32: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv8r.v v24, v16 -; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wx v20, v24, a0 -; CHECK-NEXT: vnsrl.wx v16, v8, a0 -; CHECK-NEXT: vnsrl.wi v4, v24, 0 -; CHECK-NEXT: vnsrl.wi v0, v8, 0 -; CHECK-NEXT: vmv8r.v v8, v0 -; CHECK-NEXT: ret -%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv32i32( %vec) -ret {, } %retval -} - -define {, } @vector_deinterleave_nxv8i64_nxv16i64( %vec) { -; CHECK-LABEL: vector_deinterleave_nxv8i64_nxv16i64: -; CHECK: # %bb.0: ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 40 +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v24, v8 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v16, 0 +; CHECK-NEXT: vnsrl.wi v8, v24, 0 +; CHECK-NEXT: vnsrl.wi v4, v16, 16 +; CHECK-NEXT: vnsrl.wi v0, v24, 16 +; CHECK-NEXT: vmv8r.v v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv64i16( %vec) +ret {, } %retval +} + +define {, } @vector_deinterleave_nxv16i32_nxvv32i32( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv16i32_nxvv32i32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v24, v16 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wx v20, v24, a0 +; CHECK-NEXT: vnsrl.wx v16, v8, a0 +; CHECK-NEXT: vnsrl.wi v4, v24, 0 +; CHECK-NEXT: vnsrl.wi v0, v8, 0 +; CHECK-NEXT: vmv8r.v v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv32i32( %vec) +ret {, } %retval +} + +define {, } @vector_deinterleave_nxv8i64_nxv16i64( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv8i64_nxv16i64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 56 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 54 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 53 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 51 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 50 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 49 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 48 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 47 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 46 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 45 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 44 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 43 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 42 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 41 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 40 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v24, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vid.v v8 +; CHECK-NEXT: vadd.vv v0, v8, v8 +; CHECK-NEXT: vrgather.vv v8, v24, v0 +; CHECK-NEXT: vrgather.vv v24, v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vadd.vi v16, v0, 1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vrgather.vv v0, v24, v16 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vrgather.vv v16, v24, v0 +; CHECK-NEXT: vmv4r.v v24, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv4r.v v12, v16 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv4r.v v20, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 54 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 53 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 51 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 50 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 49 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 48 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 47 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 46 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 45 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 44 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 43 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 42 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 41 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 40 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 56 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv16i64( %vec) +ret {, } %retval +} + +declare {, } @llvm.experimental.vector.deinterleave2.nxv128i1() +declare {, } @llvm.experimental.vector.deinterleave2.nxv128i8() +declare {, } @llvm.experimental.vector.deinterleave2.nxv64i16() +declare {, } @llvm.experimental.vector.deinterleave2.nxv32i32() +declare {, } @llvm.experimental.vector.deinterleave2.nxv16i64() + +; Floats + +define {, } @vector_deinterleave_nxv2f16_nxv4f16( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv2f16_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0 +; CHECK-NEXT: vnsrl.wi v9, v8, 16 +; CHECK-NEXT: vmv1r.v v8, v10 +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv4f16( %vec) +ret {, } %retval +} + +define {, } @vector_deinterleave_nxv4f16_nxv8f16( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv4f16_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0 +; CHECK-NEXT: vnsrl.wi v11, v8, 16 +; CHECK-NEXT: vmv.v.v v8, v10 +; CHECK-NEXT: vmv.v.v v9, v11 +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv8f16( %vec) +ret {, } %retval +} + +define {, } @vector_deinterleave_nxv2f32_nxv4f32( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv2f32_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vnsrl.wx v10, v8, a0 +; CHECK-NEXT: vnsrl.wi v11, v8, 0 +; CHECK-NEXT: vmv.v.v v8, v11 +; CHECK-NEXT: vmv.v.v v9, v10 +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv4f32( %vec) +ret {, } %retval +} + +define {, } @vector_deinterleave_nxv8f16_nxv16f16( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv8f16_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v8, 0 +; CHECK-NEXT: vnsrl.wi v14, v8, 16 +; CHECK-NEXT: vmv.v.v v8, v12 +; CHECK-NEXT: vmv.v.v v10, v14 +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv16f16( %vec) +ret {, } %retval +} + +define {, } @vector_deinterleave_nxv4f32_nxv8f32( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv4f32_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-NEXT: vnsrl.wx v12, v8, a0 +; CHECK-NEXT: vnsrl.wi v14, v8, 0 +; CHECK-NEXT: vmv.v.v v8, v14 +; CHECK-NEXT: vmv.v.v v10, v12 +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv8f32( %vec) +ret {, } %retval +} + +define {, } @vector_deinterleave_nxv2f64_nxv4f64( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv2f64_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma +; CHECK-NEXT: vid.v v12 +; CHECK-NEXT: vadd.vv v16, v12, v12 +; CHECK-NEXT: vrgather.vv v12, v8, v16 +; CHECK-NEXT: vadd.vi v16, v16, 1 +; CHECK-NEXT: vrgather.vv v20, v8, v16 +; CHECK-NEXT: vmv2r.v v8, v12 +; CHECK-NEXT: vmv2r.v v10, v20 +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv4f64( %vec) +ret {, } %retval +} + +declare {,} @llvm.experimental.vector.deinterleave2.nxv4f16() +declare {, } @llvm.experimental.vector.deinterleave2.nxv8f16() +declare {, } @llvm.experimental.vector.deinterleave2.nxv4f32() +declare {, } @llvm.experimental.vector.deinterleave2.nxv16f16() +declare {, } @llvm.experimental.vector.deinterleave2.nxv8f32() +declare {, } @llvm.experimental.vector.deinterleave2.nxv4f64() + +define {, } @vector_deinterleave_nxv32f16_nxv64f16( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv32f16_nxv64f16: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v24, v8 +; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma +; CHECK-NEXT: vnsrl.wi v12, v16, 0 +; CHECK-NEXT: vnsrl.wi v8, v24, 0 +; CHECK-NEXT: vnsrl.wi v4, v16, 16 +; CHECK-NEXT: vnsrl.wi v0, v24, 16 +; CHECK-NEXT: vmv8r.v v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv64f16( %vec) +ret {, } %retval +} + +define {, } @vector_deinterleave_nxv16f32_nxv32f32( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv16f32_nxv32f32: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmv8r.v v24, v16 +; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-NEXT: vnsrl.wx v20, v24, a0 +; CHECK-NEXT: vnsrl.wx v16, v8, a0 +; CHECK-NEXT: vnsrl.wi v4, v24, 0 +; CHECK-NEXT: vnsrl.wi v0, v8, 0 +; CHECK-NEXT: vmv8r.v v8, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret +%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv32f32( %vec) +ret {, } %retval +} + +define {, } @vector_deinterleave_nxv8f64_nxv16f64( %vec) { +; CHECK-LABEL: vector_deinterleave_nxv8f64_nxv16f64: +; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 56 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 54 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 53 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 52 ; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: li a1, 51 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv8r.v v24, v8 +; CHECK-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: li a1, 50 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vid.v v8 -; CHECK-NEXT: vadd.vv v0, v8, v8 -; CHECK-NEXT: vrgather.vv v8, v24, v0 -; CHECK-NEXT: vrgather.vv v24, v16, v0 +; CHECK-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a1, 49 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vadd.vi v16, v0, 1 +; CHECK-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: li a1, 48 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: li a1, 47 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vrgather.vv v0, v24, v16 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: li a1, 46 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: li a1, 45 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vrgather.vv v16, v24, v0 -; CHECK-NEXT: vmv4r.v v24, v16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: li a1, 44 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v12, v16 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v20, v24 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 40 +; CHECK-NEXT: li a1, 43 ; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: ret -%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv16i64( %vec) -ret {, } %retval -} - -declare {, } @llvm.experimental.vector.deinterleave2.nxv128i1() -declare {, } @llvm.experimental.vector.deinterleave2.nxv128i8() -declare {, } @llvm.experimental.vector.deinterleave2.nxv64i16() -declare {, } @llvm.experimental.vector.deinterleave2.nxv32i32() -declare {, } @llvm.experimental.vector.deinterleave2.nxv16i64() - -; Floats - -define {, } @vector_deinterleave_nxv2f16_nxv4f16( %vec) { -; CHECK-LABEL: vector_deinterleave_nxv2f16_nxv4f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma -; CHECK-NEXT: vnsrl.wi v10, v8, 0 -; CHECK-NEXT: vnsrl.wi v9, v8, 16 -; CHECK-NEXT: vmv1r.v v8, v10 -; CHECK-NEXT: ret -%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv4f16( %vec) -ret {, } %retval -} - -define {, } @vector_deinterleave_nxv4f16_nxv8f16( %vec) { -; CHECK-LABEL: vector_deinterleave_nxv4f16_nxv8f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma -; CHECK-NEXT: vnsrl.wi v10, v8, 0 -; CHECK-NEXT: vnsrl.wi v11, v8, 16 -; CHECK-NEXT: vmv.v.v v8, v10 -; CHECK-NEXT: vmv.v.v v9, v11 -; CHECK-NEXT: ret -%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv8f16( %vec) -ret {, } %retval -} - -define {, } @vector_deinterleave_nxv2f32_nxv4f32( %vec) { -; CHECK-LABEL: vector_deinterleave_nxv2f32_nxv4f32: -; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; CHECK-NEXT: vnsrl.wx v10, v8, a0 -; CHECK-NEXT: vnsrl.wi v11, v8, 0 -; CHECK-NEXT: vmv.v.v v8, v11 -; CHECK-NEXT: vmv.v.v v9, v10 -; CHECK-NEXT: ret -%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv4f32( %vec) -ret {, } %retval -} - -define {, } @vector_deinterleave_nxv8f16_nxv16f16( %vec) { -; CHECK-LABEL: vector_deinterleave_nxv8f16_nxv16f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma -; CHECK-NEXT: vnsrl.wi v12, v8, 0 -; CHECK-NEXT: vnsrl.wi v14, v8, 16 -; CHECK-NEXT: vmv.v.v v8, v12 -; CHECK-NEXT: vmv.v.v v10, v14 -; CHECK-NEXT: ret -%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv16f16( %vec) -ret {, } %retval -} - -define {, } @vector_deinterleave_nxv4f32_nxv8f32( %vec) { -; CHECK-LABEL: vector_deinterleave_nxv4f32_nxv8f32: -; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma -; CHECK-NEXT: vnsrl.wx v12, v8, a0 -; CHECK-NEXT: vnsrl.wi v14, v8, 0 -; CHECK-NEXT: vmv.v.v v8, v14 -; CHECK-NEXT: vmv.v.v v10, v12 -; CHECK-NEXT: ret -%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv8f32( %vec) -ret {, } %retval -} - -define {, } @vector_deinterleave_nxv2f64_nxv4f64( %vec) { -; CHECK-LABEL: vector_deinterleave_nxv2f64_nxv4f64: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vadd.vv v16, v12, v12 -; CHECK-NEXT: vrgather.vv v12, v8, v16 -; CHECK-NEXT: vadd.vi v16, v16, 1 -; CHECK-NEXT: vrgather.vv v20, v8, v16 -; CHECK-NEXT: vmv2r.v v8, v12 -; CHECK-NEXT: vmv2r.v v10, v20 -; CHECK-NEXT: ret -%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv4f64( %vec) -ret {, } %retval -} - -declare {,} @llvm.experimental.vector.deinterleave2.nxv4f16() -declare {, } @llvm.experimental.vector.deinterleave2.nxv8f16() -declare {, } @llvm.experimental.vector.deinterleave2.nxv4f32() -declare {, } @llvm.experimental.vector.deinterleave2.nxv16f16() -declare {, } @llvm.experimental.vector.deinterleave2.nxv8f32() -declare {, } @llvm.experimental.vector.deinterleave2.nxv4f64() - -define {, } @vector_deinterleave_nxv32f16_nxv64f16( %vec) { -; CHECK-LABEL: vector_deinterleave_nxv32f16_nxv64f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv8r.v v24, v8 -; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma -; CHECK-NEXT: vnsrl.wi v12, v16, 0 -; CHECK-NEXT: vnsrl.wi v8, v24, 0 -; CHECK-NEXT: vnsrl.wi v4, v16, 16 -; CHECK-NEXT: vnsrl.wi v0, v24, 16 -; CHECK-NEXT: vmv8r.v v16, v0 -; CHECK-NEXT: ret -%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv64f16( %vec) -ret {, } %retval -} - -define {, } @vector_deinterleave_nxv16f32_nxv32f32( %vec) { -; CHECK-LABEL: vector_deinterleave_nxv16f32_nxv32f32: -; CHECK: # %bb.0: -; CHECK-NEXT: vmv8r.v v24, v16 -; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma -; CHECK-NEXT: vnsrl.wx v20, v24, a0 -; CHECK-NEXT: vnsrl.wx v16, v8, a0 -; CHECK-NEXT: vnsrl.wi v4, v24, 0 -; CHECK-NEXT: vnsrl.wi v0, v8, 0 -; CHECK-NEXT: vmv8r.v v8, v0 -; CHECK-NEXT: ret -%retval = call {, } @llvm.experimental.vector.deinterleave2.nxv32f32( %vec) -ret {, } %retval -} - -define {, } @vector_deinterleave_nxv8f64_nxv16f64( %vec) { -; CHECK-LABEL: vector_deinterleave_nxv8f64_nxv16f64: -; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 42 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 41 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 40 ; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 @@ -450,8 +1750,98 @@ ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmv4r.v v20, v24 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 54 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 53 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 52 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 51 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 50 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 49 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 48 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 47 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 46 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 45 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 44 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 43 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 42 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 41 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 40 ; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 56 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-store.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh | FileCheck %s -; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh | FileCheck %s +; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh | FileCheck %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh | FileCheck %s ; Integers @@ -98,9 +98,69 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a2, 26 +; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 25 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 23 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 22 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 21 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 20 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 19 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 @@ -138,7 +198,67 @@ ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vs8r.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 25 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh | FileCheck %s -; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh | FileCheck %s +; RUN: llc < %s -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh | FileCheck %s +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh | FileCheck %s ; Integers @@ -96,6 +96,52 @@ define @vector_interleave_nxv128i1_nxv64i1( %a, %b) { ; CHECK-LABEL: vector_interleave_nxv128i1_nxv64i1: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.i v24, 0 @@ -114,6 +160,50 @@ ; CHECK-NEXT: vwmaccu.vx v24, a0, v20 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmsne.vi v8, v24, 0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.interleave2.nxv128i1( %a, %b) ret %res @@ -122,6 +212,93 @@ define @vector_interleave_nxv128i8_nxv64i8( %a, %b) { ; CHECK-LABEL: vector_interleave_nxv128i8_nxv64i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e8, m4, ta, ma ; CHECK-NEXT: vwaddu.vv v8, v24, v16 @@ -130,6 +307,91 @@ ; CHECK-NEXT: vwaddu.vv v0, v28, v20 ; CHECK-NEXT: vwmaccu.vx v0, a0, v20 ; CHECK-NEXT: vmv8r.v v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.interleave2.nxv128i8( %a, %b) ret %res @@ -138,6 +400,93 @@ define @vector_interleave_nxv64i16_nxv32i16( %a, %b) { ; CHECK-LABEL: vector_interleave_nxv64i16_nxv32i16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vwaddu.vv v8, v24, v16 @@ -146,6 +495,91 @@ ; CHECK-NEXT: vwaddu.vv v0, v28, v20 ; CHECK-NEXT: vwmaccu.vx v0, a0, v20 ; CHECK-NEXT: vmv8r.v v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.interleave2.nxv64i16( %a, %b) ret %res @@ -154,6 +588,93 @@ define @vector_interleave_nxv32i32_nxv16i32( %a, %b) { ; CHECK-LABEL: vector_interleave_nxv32i32_nxv16i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vwaddu.vv v8, v24, v16 @@ -162,6 +683,91 @@ ; CHECK-NEXT: vwaddu.vv v0, v28, v20 ; CHECK-NEXT: vwmaccu.vx v0, a0, v20 ; CHECK-NEXT: vmv8r.v v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.interleave2.nxv32i32( %a, %b) ret %res @@ -173,9 +779,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv8r.v v0, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vid.v v24 @@ -198,7 +893,96 @@ ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmv.v.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -316,6 +1100,93 @@ define @vector_interleave_nxv64f16_nxv32f16( %a, %b) { ; CHECK-LABEL: vector_interleave_nxv64f16_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vwaddu.vv v8, v24, v16 @@ -324,6 +1195,91 @@ ; CHECK-NEXT: vwaddu.vv v0, v28, v20 ; CHECK-NEXT: vwmaccu.vx v0, a0, v20 ; CHECK-NEXT: vmv8r.v v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.interleave2.nxv64f16( %a, %b) ret %res @@ -332,6 +1288,93 @@ define @vector_interleave_nxv32f32_nxv16f32( %a, %b) { ; CHECK-LABEL: vector_interleave_nxv32f32_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vwaddu.vv v8, v24, v16 @@ -340,6 +1383,91 @@ ; CHECK-NEXT: vwaddu.vv v0, v28, v20 ; CHECK-NEXT: vwmaccu.vx v0, a0, v20 ; CHECK-NEXT: vmv8r.v v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.interleave2.nxv32f32( %a, %b) ret %res @@ -351,9 +1479,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v2, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v3, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v4, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v5, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v6, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv8r.v v0, v8 ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vid.v v24 @@ -376,7 +1593,96 @@ ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmv.v.v v16, v24 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll --- a/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-splice.ll @@ -298,6 +298,52 @@ define @splice_nxv64i1_offset_negone( %a, %b) #0 { ; CHECK-LABEL: splice_nxv64i1_offset_negone: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v9, v0 ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.i v24, 0 @@ -314,6 +360,50 @@ ; CHECK-NEXT: vslideup.vi v8, v16, 1 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv64i1( %a, %b, i32 -1) ret %res @@ -322,6 +412,52 @@ define @splice_nxv64i1_offset_max( %a, %b) #0 { ; CHECK-LABEL: splice_nxv64i1_offset_max: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vmerge.vim v24, v16, 1, v0 @@ -337,6 +473,50 @@ ; CHECK-NEXT: vslideup.vx v24, v8, a0 ; CHECK-NEXT: vand.vi v8, v24, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %res = call @llvm.experimental.vector.splice.nxv64i1( %a, %b, i32 127) ret %res diff --git a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfabs-vp.ll @@ -394,6 +394,14 @@ define @vfabs_vv_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfabs_vv_nxv16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 @@ -412,6 +420,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfabs.v v8, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fabs.nxv16f64( %va, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vfadd.ll b/llvm/test/CodeGen/RISCV/rvv/vfadd.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfadd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfadd.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfhmin,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+zfhmin,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfhmin,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+zfhmin,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfadd.nxv1f16.nxv1f16( , @@ -291,11 +291,99 @@ define @intrinsic_vfadd_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfadd.vv v8, v16, v24, v0.t ; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv32f16.nxv32f16( @@ -542,11 +630,99 @@ define @intrinsic_vfadd_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfadd.vv v8, v16, v24, v0.t ; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv16f32.nxv16f32( @@ -743,11 +919,99 @@ define @intrinsic_vfadd_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfadd_mask_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfadd.vv v8, v16, v24, v0.t ; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfadd.mask.nxv8f64.nxv8f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcmp-constrained-sdnode.ll @@ -3339,13 +3339,25 @@ define @fcmp_ogt_vv_nxv32f16( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_ogt_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmflt.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv32f16( %va, %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret %1 @@ -3356,9 +3368,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -3374,9 +3386,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -3390,13 +3402,25 @@ define @fcmp_oge_vv_nxv32f16( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_oge_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmfle.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv32f16( %va, %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret %1 @@ -3407,9 +3431,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -3425,9 +3449,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -3441,13 +3465,25 @@ define @fcmp_olt_vv_nxv32f16( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_olt_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv32f16( %va, %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret %1 @@ -3458,9 +3494,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -3476,9 +3512,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -3492,13 +3528,25 @@ define @fcmp_ole_vv_nxv32f16( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_ole_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmfle.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv32f16( %va, %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret %1 @@ -3509,9 +3557,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -3527,9 +3575,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -3543,16 +3591,36 @@ define @fcmp_one_vv_nxv32f16( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_one_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmfeq.vv v26, v8, v8 -; CHECK-NEXT: vmand.mm v24, v26, v25 -; CHECK-NEXT: vmand.mm v25, v25, v26 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v25, v8, v8 +; CHECK-NEXT: vmand.mm v24, v25, v0 +; CHECK-NEXT: vmand.mm v25, v0, v25 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t ; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv32f16( %va, %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret %1 @@ -3563,10 +3631,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v24 -; CHECK-NEXT: vmand.mm v17, v24, v17 +; CHECK-NEXT: vmand.mm v16, v17, v0 +; CHECK-NEXT: vmand.mm v17, v0, v17 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v17 @@ -3584,10 +3652,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v17 -; CHECK-NEXT: vmand.mm v17, v17, v24 +; CHECK-NEXT: vmand.mm v16, v0, v17 +; CHECK-NEXT: vmand.mm v17, v17, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v17 @@ -3604,9 +3672,9 @@ ; CHECK-LABEL: fcmp_ord_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv32f16( %va, %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret %1 @@ -3617,9 +3685,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -3632,9 +3700,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -3645,16 +3713,36 @@ define @fcmp_ueq_vv_nxv32f16( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_ueq_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmfeq.vv v26, v8, v8 -; CHECK-NEXT: vmand.mm v24, v26, v25 -; CHECK-NEXT: vmand.mm v25, v25, v26 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v25, v8, v8 +; CHECK-NEXT: vmand.mm v24, v25, v0 +; CHECK-NEXT: vmand.mm v25, v0, v25 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t ; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv32f16( %va, %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret %1 @@ -3665,10 +3753,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v24 -; CHECK-NEXT: vmand.mm v17, v24, v17 +; CHECK-NEXT: vmand.mm v16, v17, v0 +; CHECK-NEXT: vmand.mm v17, v0, v17 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v17 @@ -3686,10 +3774,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v17 -; CHECK-NEXT: vmand.mm v17, v17, v24 +; CHECK-NEXT: vmand.mm v16, v0, v17 +; CHECK-NEXT: vmand.mm v17, v17, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v17 @@ -3705,13 +3793,25 @@ define @fcmp_ugt_vv_nxv32f16( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_ugt_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmfle.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv32f16( %va, %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret %1 @@ -3722,9 +3822,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -3740,9 +3840,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -3756,13 +3856,25 @@ define @fcmp_uge_vv_nxv32f16( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_uge_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv32f16( %va, %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret %1 @@ -3773,9 +3885,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -3791,9 +3903,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -3807,13 +3919,25 @@ define @fcmp_ult_vv_nxv32f16( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_ult_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmfle.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv32f16( %va, %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret %1 @@ -3824,9 +3948,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -3842,9 +3966,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -3858,13 +3982,25 @@ define @fcmp_ule_vv_nxv32f16( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_ule_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmflt.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv32f16( %va, %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret %1 @@ -3875,9 +4011,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -3893,9 +4029,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -3944,9 +4080,9 @@ ; CHECK-LABEL: fcmp_uno_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v16, v16 +; CHECK-NEXT: vmfne.vv v0, v16, v16 ; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmor.mm v0, v16, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv32f16( %va, %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret %1 @@ -3957,9 +4093,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 ; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmor.mm v0, v16, v0 ; CHECK-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -3972,9 +4108,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 ; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v24, v16 +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -6672,13 +6808,25 @@ define @fcmp_ogt_vv_nxv16f32( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_ogt_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmflt.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv16f32( %va, %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret %1 @@ -6689,9 +6837,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -6707,9 +6855,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -6723,13 +6871,25 @@ define @fcmp_oge_vv_nxv16f32( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_oge_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmfle.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv16f32( %va, %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret %1 @@ -6740,9 +6900,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -6758,9 +6918,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -6774,13 +6934,25 @@ define @fcmp_olt_vv_nxv16f32( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_olt_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv16f32( %va, %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret %1 @@ -6791,9 +6963,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -6809,9 +6981,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -6825,13 +6997,25 @@ define @fcmp_ole_vv_nxv16f32( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_ole_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmfle.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv16f32( %va, %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret %1 @@ -6842,9 +7026,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -6860,9 +7044,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -6876,16 +7060,36 @@ define @fcmp_one_vv_nxv16f32( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_one_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmfeq.vv v26, v8, v8 -; CHECK-NEXT: vmand.mm v24, v26, v25 -; CHECK-NEXT: vmand.mm v25, v25, v26 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v25, v8, v8 +; CHECK-NEXT: vmand.mm v24, v25, v0 +; CHECK-NEXT: vmand.mm v25, v0, v25 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t ; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv16f32( %va, %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret %1 @@ -6896,10 +7100,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v24 -; CHECK-NEXT: vmand.mm v17, v24, v17 +; CHECK-NEXT: vmand.mm v16, v17, v0 +; CHECK-NEXT: vmand.mm v17, v0, v17 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v17 @@ -6917,10 +7121,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v17 -; CHECK-NEXT: vmand.mm v17, v17, v24 +; CHECK-NEXT: vmand.mm v16, v0, v17 +; CHECK-NEXT: vmand.mm v17, v17, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v17 @@ -6937,9 +7141,9 @@ ; CHECK-LABEL: fcmp_ord_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv16f32( %va, %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret %1 @@ -6950,9 +7154,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: ret %head = insertelement poison, float %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -6965,9 +7169,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement poison, float %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -6978,16 +7182,36 @@ define @fcmp_ueq_vv_nxv16f32( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_ueq_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmfeq.vv v26, v8, v8 -; CHECK-NEXT: vmand.mm v24, v26, v25 -; CHECK-NEXT: vmand.mm v25, v25, v26 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v25, v8, v8 +; CHECK-NEXT: vmand.mm v24, v25, v0 +; CHECK-NEXT: vmand.mm v25, v0, v25 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t ; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv16f32( %va, %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret %1 @@ -6998,10 +7222,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v24 -; CHECK-NEXT: vmand.mm v17, v24, v17 +; CHECK-NEXT: vmand.mm v16, v17, v0 +; CHECK-NEXT: vmand.mm v17, v0, v17 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v17 @@ -7019,10 +7243,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v17 -; CHECK-NEXT: vmand.mm v17, v17, v24 +; CHECK-NEXT: vmand.mm v16, v0, v17 +; CHECK-NEXT: vmand.mm v17, v17, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v17 @@ -7038,13 +7262,25 @@ define @fcmp_ugt_vv_nxv16f32( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_ugt_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmfle.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv16f32( %va, %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret %1 @@ -7055,9 +7291,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -7073,9 +7309,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -7089,13 +7325,25 @@ define @fcmp_uge_vv_nxv16f32( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_uge_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv16f32( %va, %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret %1 @@ -7106,9 +7354,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -7124,9 +7372,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -7140,13 +7388,25 @@ define @fcmp_ult_vv_nxv16f32( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_ult_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmfle.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv16f32( %va, %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret %1 @@ -7157,9 +7417,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -7175,9 +7435,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -7191,13 +7451,25 @@ define @fcmp_ule_vv_nxv16f32( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_ule_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmflt.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv16f32( %va, %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret %1 @@ -7208,9 +7480,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -7226,9 +7498,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -7277,9 +7549,9 @@ ; CHECK-LABEL: fcmp_uno_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v16, v16 +; CHECK-NEXT: vmfne.vv v0, v16, v16 ; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmor.mm v0, v16, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv16f32( %va, %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret %1 @@ -7290,9 +7562,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 ; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmor.mm v0, v16, v0 ; CHECK-NEXT: ret %head = insertelement poison, float %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -7305,9 +7577,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 ; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v24, v16 +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement poison, float %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -9360,13 +9632,25 @@ define @fcmp_ogt_vv_nxv8f64( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_ogt_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmflt.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv8f64( %va, %vb, metadata !"ogt", metadata !"fpexcept.strict") strictfp ret %1 @@ -9377,9 +9661,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -9395,9 +9679,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -9411,13 +9695,25 @@ define @fcmp_oge_vv_nxv8f64( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_oge_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmfle.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv8f64( %va, %vb, metadata !"oge", metadata !"fpexcept.strict") strictfp ret %1 @@ -9428,9 +9724,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -9446,9 +9742,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -9462,13 +9758,25 @@ define @fcmp_olt_vv_nxv8f64( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_olt_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv8f64( %va, %vb, metadata !"olt", metadata !"fpexcept.strict") strictfp ret %1 @@ -9479,9 +9787,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -9497,9 +9805,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -9513,13 +9821,25 @@ define @fcmp_ole_vv_nxv8f64( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_ole_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmfle.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv8f64( %va, %vb, metadata !"ole", metadata !"fpexcept.strict") strictfp ret %1 @@ -9530,9 +9850,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -9548,9 +9868,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 @@ -9564,16 +9884,36 @@ define @fcmp_one_vv_nxv8f64( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_one_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmfeq.vv v26, v8, v8 -; CHECK-NEXT: vmand.mm v24, v26, v25 -; CHECK-NEXT: vmand.mm v25, v25, v26 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v25, v8, v8 +; CHECK-NEXT: vmand.mm v24, v25, v0 +; CHECK-NEXT: vmand.mm v25, v0, v25 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t ; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv8f64( %va, %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret %1 @@ -9584,10 +9924,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v24 -; CHECK-NEXT: vmand.mm v17, v24, v17 +; CHECK-NEXT: vmand.mm v16, v17, v0 +; CHECK-NEXT: vmand.mm v17, v0, v17 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v17 @@ -9605,10 +9945,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v17 -; CHECK-NEXT: vmand.mm v17, v17, v24 +; CHECK-NEXT: vmand.mm v16, v0, v17 +; CHECK-NEXT: vmand.mm v17, v17, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v17 @@ -9625,9 +9965,9 @@ ; CHECK-LABEL: fcmp_ord_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv8f64( %va, %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret %1 @@ -9638,9 +9978,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -9653,9 +9993,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -9666,16 +10006,36 @@ define @fcmp_ueq_vv_nxv8f64( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_ueq_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmfeq.vv v26, v8, v8 -; CHECK-NEXT: vmand.mm v24, v26, v25 -; CHECK-NEXT: vmand.mm v25, v25, v26 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v25, v8, v8 +; CHECK-NEXT: vmand.mm v24, v25, v0 +; CHECK-NEXT: vmand.mm v25, v0, v25 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vmflt.vv v25, v16, v8, v0.t ; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv8f64( %va, %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret %1 @@ -9686,10 +10046,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v17, v24 -; CHECK-NEXT: vmand.mm v17, v24, v17 +; CHECK-NEXT: vmand.mm v16, v17, v0 +; CHECK-NEXT: vmand.mm v17, v0, v17 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v17 @@ -9707,10 +10067,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v17, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v17 -; CHECK-NEXT: vmand.mm v17, v17, v24 +; CHECK-NEXT: vmand.mm v16, v0, v17 +; CHECK-NEXT: vmand.mm v17, v17, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmv1r.v v0, v17 @@ -9726,13 +10086,25 @@ define @fcmp_ugt_vv_nxv8f64( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_ugt_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmfle.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv8f64( %va, %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret %1 @@ -9743,9 +10115,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -9761,9 +10133,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -9777,13 +10149,25 @@ define @fcmp_uge_vv_nxv8f64( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_uge_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v16, v16 -; CHECK-NEXT: vmfeq.vv v25, v8, v8 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v24, v8, v8 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmflt.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv8f64( %va, %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret %1 @@ -9794,9 +10178,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -9812,9 +10196,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -9828,13 +10212,25 @@ define @fcmp_ult_vv_nxv8f64( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_ult_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmfle.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv8f64( %va, %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret %1 @@ -9845,9 +10241,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfge.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -9863,9 +10259,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfle.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -9879,13 +10275,25 @@ define @fcmp_ule_vv_nxv8f64( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmp_ule_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu -; CHECK-NEXT: vmfeq.vv v24, v8, v8 -; CHECK-NEXT: vmfeq.vv v25, v16, v16 -; CHECK-NEXT: vmand.mm v24, v25, v24 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v24, v16, v16 +; CHECK-NEXT: vmand.mm v24, v24, v0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmflt.vv v24, v16, v8, v0.t ; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv8f64( %va, %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret %1 @@ -9896,9 +10304,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v24, v16 +; CHECK-NEXT: vmand.mm v16, v0, v16 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmfgt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -9914,9 +10322,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfeq.vf v24, v16, fa0 +; CHECK-NEXT: vmfeq.vf v0, v16, fa0 ; CHECK-NEXT: vmfeq.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v16, v16, v24 +; CHECK-NEXT: vmand.mm v16, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmflt.vf v16, v8, fa0, v0.t ; CHECK-NEXT: vmnot.m v0, v16 @@ -9965,9 +10373,9 @@ ; CHECK-LABEL: fcmp_uno_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfne.vv v24, v16, v16 +; CHECK-NEXT: vmfne.vv v0, v16, v16 ; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmor.mm v0, v16, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmp.nxv8f64( %va, %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret %1 @@ -9978,9 +10386,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 ; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v16, v24 +; CHECK-NEXT: vmor.mm v0, v16, v0 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -9993,9 +10401,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfne.vf v24, v16, fa0 +; CHECK-NEXT: vmfne.vf v0, v16, fa0 ; CHECK-NEXT: vmfne.vv v16, v8, v8 -; CHECK-NEXT: vmor.mm v0, v24, v16 +; CHECK-NEXT: vmor.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcmps-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcmps-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfcmps-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcmps-constrained-sdnode.ll @@ -2668,10 +2668,22 @@ define @fcmps_oeq_vv_nxv32f16( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmps_oeq_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmfle.vv v25, v8, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmfle.vv v24, v8, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv32f16( %va, %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret %1 @@ -2844,10 +2856,22 @@ define @fcmps_one_vv_nxv32f16( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmps_one_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v24, v16, v8 +; CHECK-NEXT: vmor.mm v0, v24, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv32f16( %va, %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret %1 @@ -2885,9 +2909,9 @@ ; CHECK-LABEL: fcmps_ord_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v16 +; CHECK-NEXT: vmfle.vv v0, v16, v16 ; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv32f16( %va, %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret %1 @@ -2898,9 +2922,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 ; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -2913,9 +2937,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 ; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -2926,10 +2950,22 @@ define @fcmps_ueq_vv_nxv32f16( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmps_ueq_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v24, v16, v8 +; CHECK-NEXT: vmnor.mm v0, v24, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv32f16( %va, %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret %1 @@ -2967,8 +3003,8 @@ ; CHECK-LABEL: fcmps_ugt_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv32f16( %va, %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret %1 @@ -3004,8 +3040,8 @@ ; CHECK-LABEL: fcmps_uge_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv32f16( %va, %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret %1 @@ -3041,8 +3077,8 @@ ; CHECK-LABEL: fcmps_ult_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv32f16( %va, %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret %1 @@ -3078,8 +3114,8 @@ ; CHECK-LABEL: fcmps_ule_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv32f16( %va, %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret %1 @@ -3114,10 +3150,22 @@ define @fcmps_une_vv_nxv32f16( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmps_une_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmfle.vv v25, v8, v16 -; CHECK-NEXT: vmnand.mm v0, v25, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmfle.vv v24, v8, v16 +; CHECK-NEXT: vmnand.mm v0, v24, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv32f16( %va, %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret %1 @@ -3155,10 +3203,10 @@ ; CHECK-LABEL: fcmps_uno_vv_nxv32f16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v16 +; CHECK-NEXT: vmfle.vv v0, v16, v16 ; CHECK-NEXT: vmfle.vv v16, v8, v8 ; CHECK-NEXT: vmnot.m v8, v16 -; CHECK-NEXT: vmorn.mm v0, v8, v24 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv32f16( %va, %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret %1 @@ -3169,10 +3217,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 ; CHECK-NEXT: vmfle.vv v16, v8, v8 ; CHECK-NEXT: vmnot.m v8, v16 -; CHECK-NEXT: vmorn.mm v0, v8, v24 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement poison, half %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -3185,8 +3233,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmnot.m v16, v24 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmnot.m v16, v0 ; CHECK-NEXT: vmfle.vv v17, v8, v8 ; CHECK-NEXT: vmorn.mm v0, v16, v17 ; CHECK-NEXT: ret @@ -5328,10 +5376,22 @@ define @fcmps_oeq_vv_nxv16f32( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmps_oeq_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmfle.vv v25, v8, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmfle.vv v24, v8, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv16f32( %va, %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret %1 @@ -5504,10 +5564,22 @@ define @fcmps_one_vv_nxv16f32( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmps_one_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v24, v16, v8 +; CHECK-NEXT: vmor.mm v0, v24, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv16f32( %va, %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret %1 @@ -5545,9 +5617,9 @@ ; CHECK-LABEL: fcmps_ord_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v16 +; CHECK-NEXT: vmfle.vv v0, v16, v16 ; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv16f32( %va, %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret %1 @@ -5558,9 +5630,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 ; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: ret %head = insertelement poison, float %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -5573,9 +5645,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 ; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement poison, float %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -5586,10 +5658,22 @@ define @fcmps_ueq_vv_nxv16f32( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmps_ueq_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v24, v16, v8 +; CHECK-NEXT: vmnor.mm v0, v24, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv16f32( %va, %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret %1 @@ -5627,8 +5711,8 @@ ; CHECK-LABEL: fcmps_ugt_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv16f32( %va, %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret %1 @@ -5664,8 +5748,8 @@ ; CHECK-LABEL: fcmps_uge_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv16f32( %va, %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret %1 @@ -5701,8 +5785,8 @@ ; CHECK-LABEL: fcmps_ult_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv16f32( %va, %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret %1 @@ -5738,8 +5822,8 @@ ; CHECK-LABEL: fcmps_ule_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv16f32( %va, %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret %1 @@ -5774,10 +5858,22 @@ define @fcmps_une_vv_nxv16f32( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmps_une_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmfle.vv v25, v8, v16 -; CHECK-NEXT: vmnand.mm v0, v25, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmfle.vv v24, v8, v16 +; CHECK-NEXT: vmnand.mm v0, v24, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv16f32( %va, %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret %1 @@ -5815,10 +5911,10 @@ ; CHECK-LABEL: fcmps_uno_vv_nxv16f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v16 +; CHECK-NEXT: vmfle.vv v0, v16, v16 ; CHECK-NEXT: vmfle.vv v16, v8, v8 ; CHECK-NEXT: vmnot.m v8, v16 -; CHECK-NEXT: vmorn.mm v0, v8, v24 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv16f32( %va, %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret %1 @@ -5829,10 +5925,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 ; CHECK-NEXT: vmfle.vv v16, v8, v8 ; CHECK-NEXT: vmnot.m v8, v16 -; CHECK-NEXT: vmorn.mm v0, v8, v24 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement poison, float %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -5845,8 +5941,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmnot.m v16, v24 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmnot.m v16, v0 ; CHECK-NEXT: vmfle.vv v17, v8, v8 ; CHECK-NEXT: vmorn.mm v0, v16, v17 ; CHECK-NEXT: ret @@ -7456,10 +7552,22 @@ define @fcmps_oeq_vv_nxv8f64( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmps_oeq_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmfle.vv v25, v8, v16 -; CHECK-NEXT: vmand.mm v0, v25, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmfle.vv v24, v8, v16 +; CHECK-NEXT: vmand.mm v0, v24, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv8f64( %va, %vb, metadata !"oeq", metadata !"fpexcept.strict") strictfp ret %1 @@ -7632,10 +7740,22 @@ define @fcmps_one_vv_nxv8f64( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmps_one_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v24, v16, v8 +; CHECK-NEXT: vmor.mm v0, v24, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv8f64( %va, %vb, metadata !"one", metadata !"fpexcept.strict") strictfp ret %1 @@ -7673,9 +7793,9 @@ ; CHECK-LABEL: fcmps_ord_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v16 +; CHECK-NEXT: vmfle.vv v0, v16, v16 ; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv8f64( %va, %vb, metadata !"ord", metadata !"fpexcept.strict") strictfp ret %1 @@ -7686,9 +7806,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 ; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v16, v24 +; CHECK-NEXT: vmand.mm v0, v16, v0 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -7701,9 +7821,9 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 ; CHECK-NEXT: vmfle.vv v16, v8, v8 -; CHECK-NEXT: vmand.mm v0, v24, v16 +; CHECK-NEXT: vmand.mm v0, v0, v16 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -7714,10 +7834,22 @@ define @fcmps_ueq_vv_nxv8f64( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmps_ueq_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmflt.vv v25, v16, v8 -; CHECK-NEXT: vmnor.mm v0, v25, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmflt.vv v24, v16, v8 +; CHECK-NEXT: vmnor.mm v0, v24, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv8f64( %va, %vb, metadata !"ueq", metadata !"fpexcept.strict") strictfp ret %1 @@ -7755,8 +7887,8 @@ ; CHECK-LABEL: fcmps_ugt_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv8f64( %va, %vb, metadata !"ugt", metadata !"fpexcept.strict") strictfp ret %1 @@ -7792,8 +7924,8 @@ ; CHECK-LABEL: fcmps_uge_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v8, v16 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v8, v16 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv8f64( %va, %vb, metadata !"uge", metadata !"fpexcept.strict") strictfp ret %1 @@ -7829,8 +7961,8 @@ ; CHECK-LABEL: fcmps_ult_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv8f64( %va, %vb, metadata !"ult", metadata !"fpexcept.strict") strictfp ret %1 @@ -7866,8 +7998,8 @@ ; CHECK-LABEL: fcmps_ule_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmflt.vv v24, v16, v8 -; CHECK-NEXT: vmnot.m v0, v24 +; CHECK-NEXT: vmflt.vv v0, v16, v8 +; CHECK-NEXT: vmnot.m v0, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv8f64( %va, %vb, metadata !"ule", metadata !"fpexcept.strict") strictfp ret %1 @@ -7902,10 +8034,22 @@ define @fcmps_une_vv_nxv8f64( %va, %vb) nounwind strictfp { ; CHECK-LABEL: fcmps_une_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v8 -; CHECK-NEXT: vmfle.vv v25, v8, v16 -; CHECK-NEXT: vmnand.mm v0, v25, v24 +; CHECK-NEXT: vmfle.vv v0, v16, v8 +; CHECK-NEXT: vmfle.vv v24, v8, v16 +; CHECK-NEXT: vmnand.mm v0, v24, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv8f64( %va, %vb, metadata !"une", metadata !"fpexcept.strict") strictfp ret %1 @@ -7943,10 +8087,10 @@ ; CHECK-LABEL: fcmps_uno_vv_nxv8f64: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; CHECK-NEXT: vmfle.vv v24, v16, v16 +; CHECK-NEXT: vmfle.vv v0, v16, v16 ; CHECK-NEXT: vmfle.vv v16, v8, v8 ; CHECK-NEXT: vmnot.m v8, v16 -; CHECK-NEXT: vmorn.mm v0, v8, v24 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %1 = call @llvm.experimental.constrained.fcmps.nxv8f64( %va, %vb, metadata !"uno", metadata !"fpexcept.strict") strictfp ret %1 @@ -7957,10 +8101,10 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 ; CHECK-NEXT: vmfle.vv v16, v8, v8 ; CHECK-NEXT: vmnot.m v8, v16 -; CHECK-NEXT: vmorn.mm v0, v8, v24 +; CHECK-NEXT: vmorn.mm v0, v8, v0 ; CHECK-NEXT: ret %head = insertelement poison, double %b, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -7973,8 +8117,8 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v16, fa0 -; CHECK-NEXT: vmfle.vf v24, v16, fa0 -; CHECK-NEXT: vmnot.m v16, v24 +; CHECK-NEXT: vmfle.vf v0, v16, fa0 +; CHECK-NEXT: vmnot.m v16, v0 ; CHECK-NEXT: vmfle.vv v17, v8, v8 ; CHECK-NEXT: vmorn.mm v0, v16, v17 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfcopysign-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.copysign.nxv1f16(, ) @@ -1319,12 +1319,102 @@ define @vfcopysign_exttrunc_vv_nxv8f64_nxv8f16( %vm, %vs) { ; CHECK-LABEL: vfcopysign_exttrunc_vv_nxv8f64_nxv8f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfwcvt.f.f.v v20, v16 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfwcvt.f.f.v v24, v20 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %e = fpext %vs to %r = call @llvm.copysign.nxv8f64( %vm, %e) @@ -1348,12 +1438,102 @@ define @vfcopynsign_exttrunc_vv_nxv8f64_nxv8f16( %vm, %vs) { ; CHECK-LABEL: vfcopynsign_exttrunc_vv_nxv8f64_nxv8f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfwcvt.f.f.v v20, v16 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfwcvt.f.f.v v24, v20 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfsgnjn.vv v8, v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %n = fneg %vs %eneg = fpext %n to @@ -1379,10 +1559,100 @@ define @vfcopysign_exttrunc_vv_nxv8f64_nxv8f32( %vm, %vs) { ; CHECK-LABEL: vfcopysign_exttrunc_vv_nxv8f64_nxv8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfwcvt.f.f.v v24, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfsgnj.vv v8, v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %e = fpext %vs to %r = call @llvm.copysign.nxv8f64( %vm, %e) @@ -1406,10 +1676,100 @@ define @vfcopynsign_exttrunc_vv_nxv8f64_nxv8f32( %vm, %vs) { ; CHECK-LABEL: vfcopynsign_exttrunc_vv_nxv8f64_nxv8f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-NEXT: vfwcvt.f.f.v v24, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfsgnjn.vv v8, v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %n = fneg %vs %eneg = fpext %n to diff --git a/llvm/test/CodeGen/RISCV/rvv/vfdiv.ll b/llvm/test/CodeGen/RISCV/rvv/vfdiv.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfdiv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfdiv.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfdiv.nxv1f16.nxv1f16( , @@ -287,11 +287,99 @@ define @intrinsic_vfdiv_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfdiv.vv v8, v16, v24, v0.t ; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv32f16.nxv32f16( @@ -538,11 +626,99 @@ define @intrinsic_vfdiv_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfdiv.vv v8, v16, v24, v0.t ; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv16f32.nxv16f32( @@ -739,11 +915,99 @@ define @intrinsic_vfdiv_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfdiv_mask_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfdiv.vv v8, v16, v24, v0.t ; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfdiv.mask.nxv8f64.nxv8f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll @@ -394,10 +394,100 @@ define @vfma_vv_nxv32f16( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fma.nxv32f16( %va, %b, %c, %m, i32 %evl) ret %v @@ -406,9 +496,99 @@ define @vfma_vv_nxv32f16_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -781,10 +961,100 @@ define @vfma_vv_nxv16f32( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fma.nxv16f32( %va, %b, %c, %m, i32 %evl) ret %v @@ -793,9 +1063,99 @@ define @vfma_vv_nxv16f32_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -1091,10 +1451,100 @@ define @vfma_vv_nxv7f64( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fma.nxv7f64( %va, %b, %c, %m, i32 %evl) ret %v @@ -1103,9 +1553,99 @@ define @vfma_vv_nxv7f64_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv7f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -1118,10 +1658,100 @@ define @vfma_vv_nxv8f64( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fma.nxv8f64( %va, %b, %c, %m, i32 %evl) ret %v @@ -1130,9 +1760,99 @@ define @vfma_vv_nxv8f64_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -1200,29 +1920,83 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 40 +; CHECK-NEXT: li a3, 50 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x32, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 50 * vlenb ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 48 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a3, 47 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: slli a3, a1, 3 -; CHECK-NEXT: add a5, a2, a3 -; CHECK-NEXT: vl8re64.v v8, (a5) -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 24 -; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: li a3, 46 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 45 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 44 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 43 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 42 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 41 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 40 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: slli a3, a1, 3 +; CHECK-NEXT: add a5, a2, a3 +; CHECK-NEXT: vl8re64.v v8, (a5) +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li a6, 24 +; CHECK-NEXT: mul a5, a5, a6 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 ; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill @@ -1288,8 +2062,62 @@ ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 48 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 47 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 46 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 45 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 44 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 43 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 42 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 41 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 40 ; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 50 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1303,10 +2131,99 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: li a3, 40 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 38 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 37 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 36 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 35 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 34 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 5 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 5 +; CHECK-NEXT: sub a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 30 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 29 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 28 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 27 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 26 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 25 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 @@ -1353,8 +2270,97 @@ ; CHECK-NEXT: vfmadd.vv v0, v24, v8 ; CHECK-NEXT: vmv.v.v v8, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 38 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 37 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 36 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 35 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 34 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 5 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 5 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 30 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 29 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 28 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 27 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 25 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 40 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -3474,10 +4480,100 @@ define @vfmsub_vv_nxv32f16( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmsub_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfmsub.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) %v = call @llvm.vp.fma.nxv32f16( %va, %b, %negc, %m, i32 %evl) @@ -3487,28 +4583,118 @@ define @vfmsub_vv_nxv32f16_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfmsub_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v16, v24 -; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %va, %b, %negc, %m, i32 %evl) - ret %v -} - -define @vfmsub_vf_nxv32f16( %va, half %b, %vc, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmsub_vf_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmsub.vf v8, fa0, v16, v0.t -; CHECK-NEXT: ret - %elt.head = insertelement poison, half %b, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv32f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fma.nxv32f16( %va, %vb, %negvc, %m, i32 %evl) - ret %v +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %va, %b, %negc, %m, i32 %evl) + ret %v +} + +define @vfmsub_vf_nxv32f16( %va, half %b, %vc, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsub_vf_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v16, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %negvc = call @llvm.vp.fneg.nxv32f16( %vc, %m, i32 %evl) + %v = call @llvm.vp.fma.nxv32f16( %va, %vb, %negvc, %m, i32 %evl) + ret %v } define @vfmsub_vf_nxv32f16_commute( %va, half %b, %vc, %m, i32 zeroext %evl) { @@ -3557,10 +4743,100 @@ define @vfnmadd_vv_nxv32f16( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv32f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) @@ -3571,9 +4847,99 @@ define @vfnmadd_vv_nxv32f16_commuted( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv32f16_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv32f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) @@ -3584,9 +4950,99 @@ define @vfnmadd_vv_nxv32f16_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -3599,9 +5055,99 @@ define @vfnmadd_vv_nxv32f16_unmasked_commuted( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv32f16_unmasked_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -3734,10 +5280,100 @@ define @vfnmsub_vv_nxv32f16( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv32f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) @@ -3748,9 +5384,99 @@ define @vfnmsub_vv_nxv32f16_commuted( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv32f16_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv32f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) @@ -3761,9 +5487,99 @@ define @vfnmsub_vv_nxv32f16_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -3776,9 +5592,99 @@ define @vfnmsub_vv_nxv32f16_unmasked_commuted( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv32f16_unmasked_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -5589,10 +7495,100 @@ define @vfmsub_vv_nxv16f32( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmsub_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfmsub.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) %v = call @llvm.vp.fma.nxv16f32( %va, %b, %negc, %m, i32 %evl) @@ -5602,9 +7598,99 @@ define @vfmsub_vv_nxv16f32_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfmsub_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -5672,10 +7758,100 @@ define @vfnmadd_vv_nxv16f32( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv16f32( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) @@ -5686,9 +7862,99 @@ define @vfnmadd_vv_nxv16f32_commuted( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv16f32_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv16f32( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) @@ -5699,9 +7965,99 @@ define @vfnmadd_vv_nxv16f32_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -5714,9 +8070,99 @@ define @vfnmadd_vv_nxv16f32_unmasked_commuted( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv16f32_unmasked_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -5849,10 +8295,100 @@ define @vfnmsub_vv_nxv16f32( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv16f32( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) @@ -5863,9 +8399,99 @@ define @vfnmsub_vv_nxv16f32_commuted( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv16f32_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv16f32( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) @@ -5876,9 +8502,99 @@ define @vfnmsub_vv_nxv16f32_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -5891,9 +8607,99 @@ define @vfnmsub_vv_nxv16f32_unmasked_commuted( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv16f32_unmasked_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -7283,10 +10089,100 @@ define @vfmsub_vv_nxv8f64( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmsub_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmsub.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl) %v = call @llvm.vp.fma.nxv8f64( %va, %b, %negc, %m, i32 %evl) @@ -7296,9 +10192,99 @@ define @vfmsub_vv_nxv8f64_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfmsub_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -7366,10 +10352,100 @@ define @vfnmadd_vv_nxv8f64( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv8f64( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl) @@ -7380,9 +10456,99 @@ define @vfnmadd_vv_nxv8f64_commuted( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv8f64_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv8f64( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl) @@ -7393,9 +10559,99 @@ define @vfnmadd_vv_nxv8f64_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -7408,9 +10664,99 @@ define @vfnmadd_vv_nxv8f64_unmasked_commuted( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv8f64_unmasked_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -7543,10 +10889,100 @@ define @vfnmsub_vv_nxv8f64( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv8f64( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl) @@ -7557,9 +10993,99 @@ define @vfnmsub_vv_nxv8f64_commuted( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv8f64_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv8f64( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl) @@ -7570,9 +11096,99 @@ define @vfnmsub_vv_nxv8f64_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -7585,9 +11201,99 @@ define @vfnmsub_vv_nxv8f64_unmasked_commuted( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv8f64_unmasked_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmacc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmacc-vp.ll @@ -647,10 +647,100 @@ define @vfmacc_vv_nxv32f16( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu ; CHECK-NEXT: vfmacc.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -662,10 +752,100 @@ define @vfmacc_vv_nxv32f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma ; CHECK-NEXT: vfmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -725,10 +905,100 @@ define @vfmacc_vv_nxv32f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vv_nxv32f16_ta: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vfmacc.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1285,10 +1555,100 @@ define @vfmacc_vv_nxv16f32( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu ; CHECK-NEXT: vfmacc.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1300,10 +1660,100 @@ define @vfmacc_vv_nxv16f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma ; CHECK-NEXT: vfmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1363,10 +1813,100 @@ define @vfmacc_vv_nxv16f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vv_nxv16f32_ta: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vfmacc.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1796,10 +2336,100 @@ define @vfmacc_vv_nxv8f64( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu ; CHECK-NEXT: vfmacc.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1811,10 +2441,100 @@ define @vfmacc_vv_nxv8f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma ; CHECK-NEXT: vfmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1874,10 +2594,100 @@ define @vfmacc_vv_nxv8f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vv_nxv8f64_ta: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vfmacc.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-constrained-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; This tests a mix of vfmacc and vfmadd by using different operand orders to @@ -132,9 +132,99 @@ define @vfmadd_vv_nxv32f16( %va, %vb, %vc) strictfp { ; CHECK-LABEL: vfmadd_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmacc.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %vd = call @llvm.experimental.constrained.fma.nxv32f16( %vc, %vb, %va, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %vd @@ -253,9 +343,99 @@ define @vfmadd_vv_nxv16f32( %va, %vb, %vc) strictfp { ; CHECK-LABEL: vfmadd_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v24, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %vd = call @llvm.experimental.constrained.fma.nxv16f32( %vc, %va, %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %vd @@ -350,9 +530,99 @@ define @vfmadd_vv_nxv8f64( %va, %vb, %vc) strictfp { ; CHECK-LABEL: vfmadd_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmacc.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %vd = call @llvm.experimental.constrained.fma.nxv8f64( %vb, %vc, %va, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %vd diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmadd-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; This tests a mix of vfmacc and vfmadd by using different operand orders to @@ -132,9 +132,99 @@ define @vfmadd_vv_nxv32f16( %va, %vb, %vc) { ; CHECK-LABEL: vfmadd_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmacc.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %vd = call @llvm.fma.v32f16( %vc, %vb, %va) ret %vd @@ -253,9 +343,99 @@ define @vfmadd_vv_nxv16f32( %va, %vb, %vc) { ; CHECK-LABEL: vfmadd_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v24, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %vd = call @llvm.fma.v16f32( %vc, %va, %vb) ret %vd @@ -350,9 +530,99 @@ define @vfmadd_vv_nxv8f64( %va, %vb, %vc) { ; CHECK-LABEL: vfmadd_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmacc.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %vd = call @llvm.fma.v8f64( %vb, %vc, %va) ret %vd diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmax.ll b/llvm/test/CodeGen/RISCV/rvv/vfmax.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfmax.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmax.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfmax.nxv1f16.nxv1f16( , @@ -271,9 +271,97 @@ define @intrinsic_vfmax_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vfmax.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv32f16.nxv32f16( @@ -507,9 +595,97 @@ define @intrinsic_vfmax_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vfmax.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv16f32.nxv16f32( @@ -696,9 +872,97 @@ define @intrinsic_vfmax_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmax_mask_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vfmax.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmax.mask.nxv8f64.nxv8f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmin.ll b/llvm/test/CodeGen/RISCV/rvv/vfmin.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfmin.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmin.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfmin.nxv1f16.nxv1f16( , @@ -271,9 +271,97 @@ define @intrinsic_vfmin_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vfmin.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv32f16.nxv32f16( @@ -507,9 +595,97 @@ define @intrinsic_vfmin_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vfmin.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv16f32.nxv16f32( @@ -696,9 +872,97 @@ define @intrinsic_vfmin_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmin_mask_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vfmin.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmin.mask.nxv8f64.nxv8f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsac-vp.ll @@ -687,10 +687,100 @@ define @vmfsac_vv_nxv32f16( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu ; CHECK-NEXT: vfmsac.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -703,10 +793,100 @@ define @vmfsac_vv_nxv32f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma ; CHECK-NEXT: vfmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -770,10 +950,100 @@ define @vmfsac_vv_nxv32f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vv_nxv32f16_ta: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vfmsac.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1365,10 +1635,100 @@ define @vmfsac_vv_nxv16f32( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu ; CHECK-NEXT: vfmsac.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1381,10 +1741,100 @@ define @vmfsac_vv_nxv16f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma ; CHECK-NEXT: vfmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1448,10 +1898,100 @@ define @vmfsac_vv_nxv16f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vv_nxv16f32_ta: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vfmsac.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1908,10 +2448,100 @@ define @vmfsac_vv_nxv8f64( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu ; CHECK-NEXT: vfmsac.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1924,10 +2554,100 @@ define @vmfsac_vv_nxv8f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma ; CHECK-NEXT: vfmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1991,10 +2711,100 @@ define @vmfsac_vv_nxv8f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmfsac_vv_nxv8f64_ta: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vfmsac.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-constrained-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; This tests a mix of vfmsac and vfmsub by using different operand orders to @@ -142,9 +142,99 @@ define @vfmsub_vv_nxv32f16( %va, %vb, %vc) strictfp { ; CHECK-LABEL: vfmsub_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmsac.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %neg = fneg %va %vd = call @llvm.experimental.constrained.fma.nxv32f16( %vc, %vb, %neg, metadata !"round.dynamic", metadata !"fpexcept.strict") @@ -273,9 +363,99 @@ define @vfmsub_vv_nxv16f32( %va, %vb, %vc) strictfp { ; CHECK-LABEL: vfmsub_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v24, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %neg = fneg %vb %vd = call @llvm.experimental.constrained.fma.nxv16f32( %vc, %va, %neg, metadata !"round.dynamic", metadata !"fpexcept.strict") @@ -378,9 +558,99 @@ define @vfmsub_vv_nxv8f64( %va, %vb, %vc) strictfp { ; CHECK-LABEL: vfmsub_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmsac.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %neg = fneg %va %vd = call @llvm.experimental.constrained.fma.nxv8f64( %vb, %vc, %neg, metadata !"round.dynamic", metadata !"fpexcept.strict") diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmsub-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; This tests a mix of vfmsac and vfmsub by using different operand orders to @@ -142,9 +142,99 @@ define @vfmsub_vv_nxv32f16( %va, %vb, %vc) { ; CHECK-LABEL: vfmsub_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfmsac.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %neg = fneg %va %vd = call @llvm.fma.v32f16( %vc, %vb, %neg) @@ -273,9 +363,99 @@ define @vfmsub_vv_nxv16f32( %va, %vb, %vc) { ; CHECK-LABEL: vfmsub_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v24, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %neg = fneg %vb %vd = call @llvm.fma.v16f32( %vc, %va, %neg) @@ -378,9 +558,99 @@ define @vfmsub_vv_nxv8f64( %va, %vb, %vc) { ; CHECK-LABEL: vfmsub_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfmsac.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %neg = fneg %va %vd = call @llvm.fma.v8f64( %vb, %vc, %neg) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmul.ll b/llvm/test/CodeGen/RISCV/rvv/vfmul.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfmul.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmul.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfmul.nxv1f16.nxv1f16( , @@ -287,11 +287,99 @@ define @intrinsic_vfmul_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfmul.vv v8, v16, v24, v0.t ; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv32f16.nxv32f16( @@ -538,11 +626,99 @@ define @intrinsic_vfmul_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfmul.vv v8, v16, v24, v0.t ; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv16f32.nxv16f32( @@ -739,11 +915,99 @@ define @intrinsic_vfmul_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfmul_mask_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfmul.vv v8, v16, v24, v0.t ; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfmul.mask.nxv8f64.nxv8f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll @@ -394,10 +394,100 @@ define @vfma_vv_nxv32f16( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fmuladd.nxv32f16( %va, %b, %c, %m, i32 %evl) ret %v @@ -406,9 +496,99 @@ define @vfma_vv_nxv32f16_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -781,10 +961,100 @@ define @vfma_vv_nxv16f32( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fmuladd.nxv16f32( %va, %b, %c, %m, i32 %evl) ret %v @@ -793,9 +1063,99 @@ define @vfma_vv_nxv16f32_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -1091,10 +1451,100 @@ define @vfma_vv_nxv7f64( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv7f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fmuladd.nxv7f64( %va, %b, %c, %m, i32 %evl) ret %v @@ -1103,9 +1553,99 @@ define @vfma_vv_nxv7f64_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv7f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -1118,10 +1658,100 @@ define @vfma_vv_nxv8f64( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fmuladd.nxv8f64( %va, %b, %c, %m, i32 %evl) ret %v @@ -1130,9 +1760,99 @@ define @vfma_vv_nxv8f64_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -1200,29 +1920,83 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 40 +; CHECK-NEXT: li a3, 50 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb -; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x32, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 50 * vlenb ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 48 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: li a3, 47 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; CHECK-NEXT: slli a3, a1, 3 -; CHECK-NEXT: add a5, a2, a3 -; CHECK-NEXT: vl8re64.v v8, (a5) -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 24 -; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: li a3, 46 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 45 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 44 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 43 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 42 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 41 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 40 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vmv1r.v v1, v0 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; CHECK-NEXT: slli a3, a1, 3 +; CHECK-NEXT: add a5, a2, a3 +; CHECK-NEXT: vl8re64.v v8, (a5) +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li a6, 24 +; CHECK-NEXT: mul a5, a5, a6 ; CHECK-NEXT: add a5, sp, a5 ; CHECK-NEXT: addi a5, a5, 16 ; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill @@ -1288,8 +2062,62 @@ ; CHECK-NEXT: addi a0, a0, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 48 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 47 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 46 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 45 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 44 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 43 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 42 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 41 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 40 ; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 50 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -1303,10 +2131,99 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: li a3, 40 ; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 38 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 37 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 36 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 35 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 34 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 5 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 5 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 5 +; CHECK-NEXT: sub a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 30 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 29 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 28 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 27 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 26 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 25 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 @@ -1353,8 +2270,97 @@ ; CHECK-NEXT: vfmadd.vv v0, v24, v8 ; CHECK-NEXT: vmv.v.v v8, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 38 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 37 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 36 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 35 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 34 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 5 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 5 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 30 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 29 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 28 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 27 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 25 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 40 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -3474,10 +4480,100 @@ define @vfmsub_vv_nxv32f16( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmsub_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfmsub.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) %v = call @llvm.vp.fmuladd.nxv32f16( %va, %b, %negc, %m, i32 %evl) @@ -3487,28 +4583,118 @@ define @vfmsub_vv_nxv32f16_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfmsub_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v16, v24 -; CHECK-NEXT: ret - %head = insertelement poison, i1 true, i32 0 - %m = shufflevector %head, poison, zeroinitializer - %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv32f16( %va, %b, %negc, %m, i32 %evl) - ret %v -} - -define @vfmsub_vf_nxv32f16( %va, half %b, %vc, %m, i32 zeroext %evl) { -; CHECK-LABEL: vfmsub_vf_nxv32f16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma -; CHECK-NEXT: vfmsub.vf v8, fa0, v16, v0.t -; CHECK-NEXT: ret - %elt.head = insertelement poison, half %b, i32 0 - %vb = shufflevector %elt.head, poison, zeroinitializer - %negvc = call @llvm.vp.fneg.nxv32f16( %vc, %m, i32 %evl) - %v = call @llvm.vp.fmuladd.nxv32f16( %va, %vb, %negvc, %m, i32 %evl) - ret %v +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 +; CHECK-NEXT: ret + %head = insertelement poison, i1 true, i32 0 + %m = shufflevector %head, poison, zeroinitializer + %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv32f16( %va, %b, %negc, %m, i32 %evl) + ret %v +} + +define @vfmsub_vf_nxv32f16( %va, half %b, %vc, %m, i32 zeroext %evl) { +; CHECK-LABEL: vfmsub_vf_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; CHECK-NEXT: vfmsub.vf v8, fa0, v16, v0.t +; CHECK-NEXT: ret + %elt.head = insertelement poison, half %b, i32 0 + %vb = shufflevector %elt.head, poison, zeroinitializer + %negvc = call @llvm.vp.fneg.nxv32f16( %vc, %m, i32 %evl) + %v = call @llvm.vp.fmuladd.nxv32f16( %va, %vb, %negvc, %m, i32 %evl) + ret %v } define @vfmsub_vf_nxv32f16_commute( %va, half %b, %vc, %m, i32 zeroext %evl) { @@ -3557,10 +4743,100 @@ define @vfnmadd_vv_nxv32f16( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv32f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) @@ -3571,9 +4847,99 @@ define @vfnmadd_vv_nxv32f16_commuted( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv32f16_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv32f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) @@ -3584,9 +4950,99 @@ define @vfnmadd_vv_nxv32f16_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -3599,9 +5055,99 @@ define @vfnmadd_vv_nxv32f16_unmasked_commuted( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv32f16_unmasked_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -3734,10 +5280,100 @@ define @vfnmsub_vv_nxv32f16( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv32f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) @@ -3748,9 +5384,99 @@ define @vfnmsub_vv_nxv32f16_commuted( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv32f16_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv32f16( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv32f16( %c, %m, i32 %evl) @@ -3761,9 +5487,99 @@ define @vfnmsub_vv_nxv32f16_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -3776,9 +5592,99 @@ define @vfnmsub_vv_nxv32f16_unmasked_commuted( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv32f16_unmasked_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -5589,10 +7495,100 @@ define @vfmsub_vv_nxv16f32( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmsub_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfmsub.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) %v = call @llvm.vp.fmuladd.nxv16f32( %va, %b, %negc, %m, i32 %evl) @@ -5602,9 +7598,99 @@ define @vfmsub_vv_nxv16f32_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfmsub_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -5672,10 +7758,100 @@ define @vfnmadd_vv_nxv16f32( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv16f32( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) @@ -5686,9 +7862,99 @@ define @vfnmadd_vv_nxv16f32_commuted( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv16f32_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv16f32( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) @@ -5699,9 +7965,99 @@ define @vfnmadd_vv_nxv16f32_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -5714,9 +8070,99 @@ define @vfnmadd_vv_nxv16f32_unmasked_commuted( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv16f32_unmasked_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -5849,10 +8295,100 @@ define @vfnmsub_vv_nxv16f32( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv16f32( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) @@ -5863,9 +8399,99 @@ define @vfnmsub_vv_nxv16f32_commuted( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv16f32_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv16f32( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv16f32( %c, %m, i32 %evl) @@ -5876,9 +8502,99 @@ define @vfnmsub_vv_nxv16f32_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -5891,9 +8607,99 @@ define @vfnmsub_vv_nxv16f32_unmasked_commuted( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv16f32_unmasked_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -7283,10 +10089,100 @@ define @vfmsub_vv_nxv8f64( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmsub_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmsub.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl) %v = call @llvm.vp.fmuladd.nxv8f64( %va, %b, %negc, %m, i32 %evl) @@ -7296,9 +10192,99 @@ define @vfmsub_vv_nxv8f64_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfmsub_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmsub.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -7366,10 +10352,100 @@ define @vfnmadd_vv_nxv8f64( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv8f64( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl) @@ -7380,9 +10456,99 @@ define @vfnmadd_vv_nxv8f64_commuted( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv8f64_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv8f64( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl) @@ -7393,9 +10559,99 @@ define @vfnmadd_vv_nxv8f64_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -7408,9 +10664,99 @@ define @vfnmadd_vv_nxv8f64_unmasked_commuted( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmadd_vv_nxv8f64_unmasked_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -7543,10 +10889,100 @@ define @vfnmsub_vv_nxv8f64( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv8f64( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl) @@ -7557,9 +10993,99 @@ define @vfnmsub_vv_nxv8f64_commuted( %va, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv8f64_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %negb = call @llvm.vp.fneg.nxv8f64( %b, %m, i32 %evl) %negc = call @llvm.vp.fneg.nxv8f64( %c, %m, i32 %evl) @@ -7570,9 +11096,99 @@ define @vfnmsub_vv_nxv8f64_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer @@ -7585,9 +11201,99 @@ define @vfnmsub_vv_nxv8f64_unmasked_commuted( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsub_vv_nxv8f64_unmasked_commuted: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i1 true, i32 0 %m = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfneg-vp.ll @@ -394,6 +394,14 @@ define @vfneg_vv_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfneg_vv_nxv16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 @@ -412,6 +420,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfneg.v v8, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fneg.nxv16f64( %va, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmacc-vp.ll @@ -727,10 +727,100 @@ define @vfnmacc_vv_nxv32f16( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu ; CHECK-NEXT: vfnmacc.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -744,10 +834,100 @@ define @vfnmacc_vv_nxv32f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma ; CHECK-NEXT: vfnmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -815,10 +995,100 @@ define @vfnmacc_vv_nxv32f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vv_nxv32f16_ta: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vfnmacc.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1445,10 +1715,100 @@ define @vfnmacc_vv_nxv16f32( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu ; CHECK-NEXT: vfnmacc.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1462,10 +1822,100 @@ define @vfnmacc_vv_nxv16f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma ; CHECK-NEXT: vfnmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1533,10 +1983,100 @@ define @vfnmacc_vv_nxv16f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vv_nxv16f32_ta: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vfnmacc.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -2020,10 +2560,100 @@ define @vfnmacc_vv_nxv8f64( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu ; CHECK-NEXT: vfnmacc.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -2037,10 +2667,100 @@ define @vfnmacc_vv_nxv8f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma ; CHECK-NEXT: vfnmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -2108,10 +2828,100 @@ define @vfnmacc_vv_nxv8f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vv_nxv8f64_ta: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vfnmacc.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-constrained-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; This tests a mix of vfnmacc and vfnmadd by using different operand orders to @@ -152,9 +152,99 @@ define @vfnmsub_vv_nxv32f16( %va, %vb, %vc) strictfp { ; CHECK-LABEL: vfnmsub_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v24, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %neg = fneg %vc %neg2 = fneg %vb @@ -293,9 +383,99 @@ define @vfnmsub_vv_nxv16f32( %va, %vb, %vc) strictfp { ; CHECK-LABEL: vfnmsub_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v24, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %neg = fneg %va %neg2 = fneg %vb @@ -406,9 +586,99 @@ define @vfnmsub_vv_nxv8f64( %va, %vb, %vc) strictfp { ; CHECK-LABEL: vfnmsub_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfnmacc.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %neg = fneg %vb %neg2 = fneg %va diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmadd-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; This tests a mix of vfnmacc and vfnmadd by using different operand orders to @@ -152,9 +152,99 @@ define @vfnmsub_vv_nxv32f16( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v24, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %neg = fneg %vc %neg2 = fneg %vb @@ -293,9 +383,99 @@ define @vfnmsub_vv_nxv16f32( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfnmadd.vv v8, v24, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %neg = fneg %va %neg2 = fneg %vb @@ -406,9 +586,99 @@ define @vfnmsub_vv_nxv8f64( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfnmacc.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %neg = fneg %vb %neg2 = fneg %va diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsac-vp.ll @@ -687,10 +687,100 @@ define @vfnmsac_vv_nxv32f16( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu ; CHECK-NEXT: vfnmsac.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -703,10 +793,100 @@ define @vfnmsac_vv_nxv32f16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vv_nxv32f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma ; CHECK-NEXT: vfnmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -770,10 +950,100 @@ define @vfnmsac_vv_nxv32f16_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vv_nxv32f16_ta: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vfnmsac.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1365,10 +1635,100 @@ define @vfnmsac_vv_nxv16f32( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu ; CHECK-NEXT: vfnmsac.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1381,10 +1741,100 @@ define @vfnmsac_vv_nxv16f32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vv_nxv16f32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma ; CHECK-NEXT: vfnmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1448,10 +1898,100 @@ define @vfnmsac_vv_nxv16f32_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vv_nxv16f32_ta: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vfnmsac.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1908,10 +2448,100 @@ define @vfnmsac_vv_nxv8f64( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu ; CHECK-NEXT: vfnmsac.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1924,10 +2554,100 @@ define @vfnmsac_vv_nxv8f64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma ; CHECK-NEXT: vfnmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1991,10 +2711,100 @@ define @vfnmsac_vv_nxv8f64_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmsac_vv_nxv8f64_ta: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vfnmsac.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-constrained-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; This tests a mix of vfnmsac and vfnmsub by using different operand orders to @@ -142,9 +142,99 @@ define @vfnmsub_vv_nxv32f16( %va, %vb, %vc) strictfp { ; CHECK-LABEL: vfnmsub_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfnmsub.vv v8, v24, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %neg = fneg %vc %vd = call @llvm.experimental.constrained.fma.nxv32f16( %neg, %va, %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") @@ -273,9 +363,99 @@ define @vfnmsub_vv_nxv16f32( %va, %vb, %vc) strictfp { ; CHECK-LABEL: vfnmsub_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfnmsub.vv v8, v24, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %neg = fneg %va %vd = call @llvm.experimental.constrained.fma.nxv16f32( %vc, %neg, %vb, metadata !"round.dynamic", metadata !"fpexcept.strict") @@ -378,9 +558,99 @@ define @vfnmsub_vv_nxv8f64( %va, %vb, %vc) strictfp { ; CHECK-LABEL: vfnmsub_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfnmsac.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %neg = fneg %vb %vd = call @llvm.experimental.constrained.fma.nxv8f64( %neg, %vc, %va, metadata !"round.dynamic", metadata !"fpexcept.strict") diff --git a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfnmsub-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -target-abi=ilp32d \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=ilp32d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -target-abi=lp64d \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s ; This tests a mix of vfnmsac and vfnmsub by using different operand orders to @@ -142,9 +142,99 @@ define @vfnmsub_vv_nxv32f16( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv32f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vfnmsub.vv v8, v24, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %neg = fneg %vc %vd = call @llvm.fma.v32f16( %neg, %va, %vb) @@ -273,9 +363,99 @@ define @vfnmsub_vv_nxv16f32( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv16f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vfnmsub.vv v8, v24, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %neg = fneg %va %vd = call @llvm.fma.v16f32( %vc, %neg, %vb) @@ -378,9 +558,99 @@ define @vfnmsub_vv_nxv8f64( %va, %vb, %vc) { ; CHECK-LABEL: vfnmsub_vv_nxv8f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfnmsac.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %neg = fneg %vb %vd = call @llvm.fma.v8f64( %neg, %vc, %va) diff --git a/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfpext-vp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.fpext.nxv2f32.nxv2f16(, , i32) @@ -94,6 +94,52 @@ define @vfpext_nxv32f16_nxv32f32( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vfpext_nxv32f16_nxv32f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 @@ -114,6 +160,50 @@ ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfwcvt.f.f.v v24, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fpext.nxv32f32.nxv32f16( %a, %m, i32 %vl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptosi-vp.ll @@ -314,6 +314,52 @@ define @vfptosi_nxv32i16_nxv32f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptosi_nxv32i16_nxv32f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 @@ -334,6 +380,50 @@ ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfncvt.rtz.x.f.w v24, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fptosi.nxv32i16.nxv32f32( %va, %m, i32 %evl) ret %v @@ -344,6 +434,14 @@ define @vfptosi_nxv32i32_nxv32f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptosi_nxv32i32_nxv32f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 @@ -363,6 +461,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fptosi.nxv32i32.nxv32f32( %va, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptoui-vp.ll @@ -314,6 +314,52 @@ define @vfptoui_nxv32i16_nxv32f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptoui_nxv32i16_nxv32f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 @@ -334,6 +380,50 @@ ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfncvt.rtz.xu.f.w v24, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fptoui.nxv32i16.nxv32f32( %va, %m, i32 %evl) ret %v @@ -344,6 +434,14 @@ define @vfptoui_nxv32i32_nxv32f32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptoui_nxv32i32_nxv32f32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 @@ -363,6 +461,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.fptoui.nxv32i32.nxv32f32( %va, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll @@ -97,9 +97,62 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 14 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 13 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 12 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 11 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 10 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -124,7 +177,60 @@ ; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -140,9 +246,69 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 26 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 25 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 23 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 4 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -209,7 +375,67 @@ ; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 25 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnj.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnj.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfsgnj.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnj.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfsgnj.nxv1f16.nxv1f16( , @@ -271,9 +271,97 @@ define @intrinsic_vfsgnj_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv32f16.nxv32f16( @@ -507,9 +595,97 @@ define @intrinsic_vfsgnj_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv16f32.nxv16f32( @@ -696,9 +872,97 @@ define @intrinsic_vfsgnj_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnj_mask_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnj.mask.nxv8f64.nxv8f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjn.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjn.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfsgnjn.nxv1f16.nxv1f16( , @@ -271,9 +271,97 @@ define @intrinsic_vfsgnjn_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vfsgnjn.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv32f16.nxv32f16( @@ -507,9 +595,97 @@ define @intrinsic_vfsgnjn_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vfsgnjn.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv16f32.nxv16f32( @@ -696,9 +872,97 @@ define @intrinsic_vfsgnjn_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjn_mask_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vfsgnjn.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjn.mask.nxv8f64.nxv8f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx.ll b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfsgnjx.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsgnjx.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfsgnjx.nxv1f16.nxv1f16( , @@ -271,9 +271,97 @@ define @intrinsic_vfsgnjx_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vfsgnjx.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv32f16.nxv32f16( @@ -507,9 +595,97 @@ define @intrinsic_vfsgnjx_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vfsgnjx.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv16f32.nxv16f32( @@ -696,9 +872,97 @@ define @intrinsic_vfsgnjx_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsgnjx_mask_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vfsgnjx.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsgnjx.mask.nxv8f64.nxv8f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll @@ -394,6 +394,14 @@ define @vfsqrt_vv_nxv16f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfsqrt_vv_nxv16f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 @@ -412,6 +420,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfsqrt.v v8, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.sqrt.nxv16f64( %va, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsub.ll b/llvm/test/CodeGen/RISCV/rvv/vfsub.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsub.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfsub.nxv1f16.nxv1f16( , @@ -265,9 +265,97 @@ define @intrinsic_vfsub_mask_vv_nxv32f16_nxv32f16_nxv32f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv32f16_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vfsub.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv32f16.nxv32f16( @@ -496,9 +584,97 @@ define @intrinsic_vfsub_mask_vv_nxv16f32_nxv16f32_nxv16f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv16f32_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vfsub.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv16f32.nxv16f32( @@ -681,9 +857,97 @@ define @intrinsic_vfsub_mask_vv_nxv8f64_nxv8f64_nxv8f64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfsub_mask_vv_nxv8f64_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vfsub.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfsub.mask.nxv8f64.nxv8f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16( , @@ -237,11 +237,53 @@ define @intrinsic_vfwadd.w_mask_wv_nxv16f32_nxv16f32_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv16f32_nxv16f32_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl4re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfwadd.wv v8, v16, v24, v0.t ; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv16f32.nxv16f16( @@ -438,11 +480,53 @@ define @intrinsic_vfwadd.w_mask_wv_nxv8f64_nxv8f64_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_mask_wv_nxv8f64_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl4re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfwadd.wv v8, v16, v24, v0.t ; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.mask.nxv8f64.nxv8f32( @@ -1383,11 +1467,99 @@ define @intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfwadd.wv v24, v16, v8 ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwmacc-vp.ll @@ -829,12 +829,56 @@ define @vfmacc_vv_nxv8f64_nxv8f16( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vv_nxv8f64_nxv8f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfwcvt.f.f.v v12, v8, v0.t ; CHECK-NEXT: vfwcvt.f.f.v v24, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfwmacc.vv v16, v12, v24, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %aext = call @llvm.vp.fpext.nxv8f64.nxv8f16( %a, %m, i32 %evl) %bext = call @llvm.vp.fpext.nxv8f64.nxv8f16( %b, %m, i32 %evl) @@ -845,12 +889,56 @@ define @vfmacc_vv_nxv8f64_nxv8f16_unmasked( %a, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfmacc_vv_nxv8f64_nxv8f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfwcvt.f.f.v v12, v8 ; CHECK-NEXT: vfwcvt.f.f.v v24, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfwmacc.vv v16, v12, v24 ; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwnmacc-vp.ll @@ -939,12 +939,56 @@ define @vfnmacc_vv_nxv8f64_nxv4f16( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vv_nxv8f64_nxv4f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfwcvt.f.f.v v12, v8, v0.t ; CHECK-NEXT: vfwcvt.f.f.v v24, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfwnmacc.vv v16, v12, v24, v0.t ; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %aext = call @llvm.vp.fpext.nxv8f64.nxv8f16( %a, %m, i32 %evl) %bext = call @llvm.vp.fpext.nxv8f64.nxv8f16( %b, %m, i32 %evl) @@ -957,12 +1001,56 @@ define @vfnmacc_vv_nxv8f64_nxv4f16_unmasked( %a, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfnmacc_vv_nxv8f64_nxv4f16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfwcvt.f.f.v v12, v8 ; CHECK-NEXT: vfwcvt.f.f.v v24, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfwnmacc.vv v16, v12, v24 ; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w.ll --- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=ilp32d | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+zvfh \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+zvfh \ ; RUN: -verify-machineinstrs -target-abi=lp64d | FileCheck %s declare @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16( , @@ -237,11 +237,53 @@ define @intrinsic_vfwsub.w_mask_wv_nxv16f32_nxv16f32_nxv16f16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv16f32_nxv16f32_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl4re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfwsub.wv v8, v16, v24, v0.t ; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv16f32.nxv16f16( @@ -438,11 +480,53 @@ define @intrinsic_vfwsub.w_mask_wv_nxv8f64_nxv8f64_nxv8f32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_mask_wv_nxv8f64_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl4re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfwsub.wv v8, v16, v24, v0.t ; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.mask.nxv8f64.nxv8f32( @@ -1383,11 +1467,99 @@ define @intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfwsub.wv v24, v16, v8 ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vghsh.ll b/llvm/test/CodeGen/RISCV/rvv/vghsh.ll --- a/llvm/test/CodeGen/RISCV/rvv/vghsh.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vghsh.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvkg \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+experimental-zvkg \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvkg \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+experimental-zvkg \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK declare @llvm.riscv.vghsh.nxv1i32.nxv1i32( @@ -110,9 +110,97 @@ define @intrinsic_vghsh_vv_nxv16i32_nxv16i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vghsh_vv_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma ; CHECK-NEXT: vghsh.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vghsh.nxv16i32.nxv16i32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv32.ll @@ -21,9 +21,53 @@ define @test_vloxseg2_mask_nxv16i16_nxv16i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv16i16.nxv16i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -50,9 +94,53 @@ define @test_vloxseg2_mask_nxv16i16_nxv16i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv16i16_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv16i16.nxv16i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -79,9 +167,53 @@ define @test_vloxseg2_mask_nxv16i16_nxv16i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv16i16_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v4, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv16i16.nxv16i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -108,9 +240,23 @@ define @test_vloxseg2_mask_nxv1i8_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i8.nxv1i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -137,9 +283,23 @@ define @test_vloxseg2_mask_nxv1i8_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i8_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i8.nxv1i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -166,9 +326,23 @@ define @test_vloxseg2_mask_nxv1i8_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i8_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i8.nxv1i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -195,11 +369,25 @@ define @test_vloxseg3_mask_nxv1i8_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i8.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -226,11 +414,25 @@ define @test_vloxseg3_mask_nxv1i8_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i8_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i8.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -257,11 +459,25 @@ define @test_vloxseg3_mask_nxv1i8_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i8_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i8.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -813,9 +1029,31 @@ define @test_vloxseg2_mask_nxv16i8_nxv16i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv16i8_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv16i8.nxv16i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -842,9 +1080,31 @@ define @test_vloxseg2_mask_nxv16i8_nxv16i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv16i8.nxv16i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -871,9 +1131,31 @@ define @test_vloxseg2_mask_nxv16i8_nxv16i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv16i8_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv16i8.nxv16i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -900,10 +1182,32 @@ define @test_vloxseg3_mask_nxv16i8_nxv16i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv16i8_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv16i8.nxv16i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -930,11 +1234,33 @@ define @test_vloxseg3_mask_nxv16i8_nxv16i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv16i8.nxv16i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -961,10 +1287,32 @@ define @test_vloxseg3_mask_nxv16i8_nxv16i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv16i8_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv16i8.nxv16i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -991,12 +1339,34 @@ define @test_vloxseg4_mask_nxv16i8_nxv16i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv16i8_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv4r.v v16, v12 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vloxseg4ei16.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv16i8.nxv16i16( %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1056,11 +1426,33 @@ define @test_vloxseg4_mask_nxv16i8_nxv16i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv16i8_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vloxseg4ei32.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv16i8.nxv16i32( %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1087,9 +1479,23 @@ define @test_vloxseg2_mask_nxv2i32_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i32.nxv2i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1116,9 +1522,23 @@ define @test_vloxseg2_mask_nxv2i32_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i32_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i32.nxv2i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1145,9 +1565,23 @@ define @test_vloxseg2_mask_nxv2i32_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i32.nxv2i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1174,11 +1608,25 @@ define @test_vloxseg3_mask_nxv2i32_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i32.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1205,11 +1653,25 @@ define @test_vloxseg3_mask_nxv2i32_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i32_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i32.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1236,11 +1698,25 @@ define @test_vloxseg3_mask_nxv2i32_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i32.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1792,9 +2268,23 @@ define @test_vloxseg2_mask_nxv4i16_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i16.nxv4i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1821,9 +2311,23 @@ define @test_vloxseg2_mask_nxv4i16_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i16.nxv4i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1850,9 +2354,23 @@ define @test_vloxseg2_mask_nxv4i16_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i16.nxv4i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1879,11 +2397,25 @@ define @test_vloxseg3_mask_nxv4i16_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4i16.nxv4i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1910,11 +2442,25 @@ define @test_vloxseg3_mask_nxv4i16_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4i16.nxv4i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1941,10 +2487,24 @@ define @test_vloxseg3_mask_nxv4i16_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4i16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4i16.nxv4i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -2037,12 +2597,26 @@ define @test_vloxseg4_mask_nxv4i16_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv4i16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg4ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv4i16.nxv4i32( %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -2495,9 +3069,23 @@ define @test_vloxseg2_mask_nxv1i32_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i32_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i32.nxv1i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -2524,9 +3112,23 @@ define @test_vloxseg2_mask_nxv1i32_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i32.nxv1i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -2553,9 +3155,23 @@ define @test_vloxseg2_mask_nxv1i32_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i32.nxv1i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -2582,11 +3198,25 @@ define @test_vloxseg3_mask_nxv1i32_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i32_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i32.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -2613,11 +3243,25 @@ define @test_vloxseg3_mask_nxv1i32_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i32.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -2644,11 +3288,25 @@ define @test_vloxseg3_mask_nxv1i32_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i32.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3200,9 +3858,31 @@ define @test_vloxseg2_mask_nxv8i16_nxv8i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8i16.nxv8i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3229,9 +3909,31 @@ define @test_vloxseg2_mask_nxv8i16_nxv8i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8i16.nxv8i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3258,9 +3960,31 @@ define @test_vloxseg2_mask_nxv8i16_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8i16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8i16.nxv8i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3287,11 +4011,33 @@ define @test_vloxseg3_mask_nxv8i16_nxv8i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv8i16.nxv8i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3318,11 +4064,33 @@ define @test_vloxseg3_mask_nxv8i16_nxv8i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv8i16.nxv8i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3349,10 +4117,32 @@ define @test_vloxseg3_mask_nxv8i16_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv8i16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv8i16.nxv8i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3445,12 +4235,34 @@ define @test_vloxseg4_mask_nxv8i16_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv8i16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv4r.v v16, v12 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg4ei32.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv8i16.nxv8i32( %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3477,9 +4289,23 @@ define @test_vloxseg2_mask_nxv8i8_nxv8i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8i8_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8i8.nxv8i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3506,9 +4332,23 @@ define @test_vloxseg2_mask_nxv8i8_nxv8i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8i8.nxv8i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3535,9 +4375,23 @@ define @test_vloxseg2_mask_nxv8i8_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8i8_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8i8.nxv8i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3564,10 +4418,24 @@ define @test_vloxseg3_mask_nxv8i8_nxv8i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv8i8_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv8i8.nxv8i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3594,11 +4462,25 @@ define @test_vloxseg3_mask_nxv8i8_nxv8i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv8i8.nxv8i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3625,10 +4507,24 @@ define @test_vloxseg3_mask_nxv8i8_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv8i8_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv8i8.nxv8i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3655,12 +4551,26 @@ define @test_vloxseg4_mask_nxv8i8_nxv8i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv8i8_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg4ei16.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv8i8.nxv8i16( %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3720,11 +4630,25 @@ define @test_vloxseg4_mask_nxv8i8_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv8i8_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg4ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv8i8.nxv8i32( %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3819,12 +4743,26 @@ define @test_vloxseg5_mask_nxv8i8_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg5_mask_nxv8i8_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vmv1r.v v11, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg5ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vloxseg5.mask.nxv8i8.nxv8i32( %val, %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3921,6 +4859,14 @@ define @test_vloxseg6_mask_nxv8i8_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg6_mask_nxv8i8_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3929,6 +4875,12 @@ ; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg6ei32.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vloxseg6.mask.nxv8i8.nxv8i32( %val, %val, %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4174,9 +5126,53 @@ define @test_vloxseg2_mask_nxv8i32_nxv8i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8i32_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8i32.nxv8i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4203,9 +5199,53 @@ define @test_vloxseg2_mask_nxv8i32_nxv8i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8i32_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8i32.nxv8i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4232,9 +5272,53 @@ define @test_vloxseg2_mask_nxv8i32_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8i32.nxv8i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4261,9 +5345,23 @@ define @test_vloxseg2_mask_nxv4i8_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i8_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i8.nxv4i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4290,9 +5388,23 @@ define @test_vloxseg2_mask_nxv4i8_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i8.nxv4i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4319,9 +5431,23 @@ define @test_vloxseg2_mask_nxv4i8_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i8_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i8.nxv4i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4348,11 +5474,25 @@ define @test_vloxseg3_mask_nxv4i8_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4i8_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4i8.nxv4i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4379,11 +5519,25 @@ define @test_vloxseg3_mask_nxv4i8_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4i8.nxv4i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4410,10 +5564,24 @@ define @test_vloxseg3_mask_nxv4i8_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4i8_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4i8.nxv4i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4506,12 +5674,26 @@ define @test_vloxseg4_mask_nxv4i8_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv4i8_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vloxseg4ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv4i8.nxv4i32( %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4964,9 +6146,23 @@ define @test_vloxseg2_mask_nxv1i16_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i16.nxv1i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4993,9 +6189,23 @@ define @test_vloxseg2_mask_nxv1i16_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i16_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i16.nxv1i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5022,9 +6232,23 @@ define @test_vloxseg2_mask_nxv1i16_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i16.nxv1i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5051,11 +6275,25 @@ define @test_vloxseg3_mask_nxv1i16_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i16.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5082,11 +6320,25 @@ define @test_vloxseg3_mask_nxv1i16_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i16_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i16.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5113,11 +6365,25 @@ define @test_vloxseg3_mask_nxv1i16_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i16.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5669,9 +6935,53 @@ define @test_vloxseg2_mask_nxv32i8_nxv32i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv32i8_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv32i8.nxv32i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5698,9 +7008,53 @@ define @test_vloxseg2_mask_nxv32i8_nxv32i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv32i8.nxv32i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5727,9 +7081,23 @@ define @test_vloxseg2_mask_nxv2i8_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i8_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i8.nxv2i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5756,9 +7124,23 @@ define @test_vloxseg2_mask_nxv2i8_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i8.nxv2i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5785,9 +7167,23 @@ define @test_vloxseg2_mask_nxv2i8_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i8_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i8.nxv2i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5814,11 +7210,25 @@ define @test_vloxseg3_mask_nxv2i8_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i8_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i8.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5845,11 +7255,25 @@ define @test_vloxseg3_mask_nxv2i8_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i8.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5876,11 +7300,25 @@ define @test_vloxseg3_mask_nxv2i8_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i8_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i8.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -6432,9 +7870,23 @@ define @test_vloxseg2_mask_nxv2i16_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i16_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i16.nxv2i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -6461,9 +7913,23 @@ define @test_vloxseg2_mask_nxv2i16_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i16.nxv2i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -6490,9 +7956,23 @@ define @test_vloxseg2_mask_nxv2i16_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i16.nxv2i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -6519,11 +7999,25 @@ define @test_vloxseg3_mask_nxv2i16_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i16_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i16.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -6550,11 +8044,25 @@ define @test_vloxseg3_mask_nxv2i16_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i16.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -6581,11 +8089,25 @@ define @test_vloxseg3_mask_nxv2i16_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i16.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7137,9 +8659,31 @@ define @test_vloxseg2_mask_nxv4i32_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i32.nxv4i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7166,9 +8710,31 @@ define @test_vloxseg2_mask_nxv4i32_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i32_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i32.nxv4i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7195,9 +8761,31 @@ define @test_vloxseg2_mask_nxv4i32_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i32.nxv4i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7224,11 +8812,33 @@ define @test_vloxseg3_mask_nxv4i32_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4i32.nxv4i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7255,11 +8865,33 @@ define @test_vloxseg3_mask_nxv4i32_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4i32_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4i32.nxv4i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7286,11 +8918,33 @@ define @test_vloxseg3_mask_nxv4i32_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4i32.nxv4i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7416,9 +9070,53 @@ define @test_vloxseg2_mask_nxv16f16_nxv16i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv16f16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv16f16.nxv16i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7445,9 +9143,53 @@ define @test_vloxseg2_mask_nxv16f16_nxv16i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv16f16_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv16f16.nxv16i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7474,9 +9216,53 @@ define @test_vloxseg2_mask_nxv16f16_nxv16i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv16f16_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v4, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv16f16.nxv16i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7503,9 +9289,53 @@ define @test_vloxseg2_mask_nxv4f64_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4f64_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4f64.nxv4i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7532,9 +9362,53 @@ define @test_vloxseg2_mask_nxv4f64_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4f64_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4f64.nxv4i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7561,9 +9435,53 @@ define @test_vloxseg2_mask_nxv4f64_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4f64_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4f64.nxv4i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7590,9 +9508,23 @@ define @test_vloxseg2_mask_nxv1f64_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1f64_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1f64.nxv1i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7619,9 +9551,23 @@ define @test_vloxseg2_mask_nxv1f64_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1f64_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1f64.nxv1i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7648,9 +9594,23 @@ define @test_vloxseg2_mask_nxv1f64_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1f64_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1f64.nxv1i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7677,11 +9637,25 @@ define @test_vloxseg3_mask_nxv1f64_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1f64_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1f64.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7708,11 +9682,25 @@ define @test_vloxseg3_mask_nxv1f64_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1f64_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1f64.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7739,11 +9727,25 @@ define @test_vloxseg3_mask_nxv1f64_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1f64_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1f64.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -8295,9 +10297,23 @@ define @test_vloxseg2_mask_nxv2f32_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2f32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2f32.nxv2i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -8324,9 +10340,23 @@ define @test_vloxseg2_mask_nxv2f32_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2f32_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2f32.nxv2i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -8353,9 +10383,23 @@ define @test_vloxseg2_mask_nxv2f32_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2f32_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2f32.nxv2i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -8382,11 +10426,25 @@ define @test_vloxseg3_mask_nxv2f32_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2f32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2f32.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -8413,11 +10471,25 @@ define @test_vloxseg3_mask_nxv2f32_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2f32_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2f32.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -8444,11 +10516,25 @@ define @test_vloxseg3_mask_nxv2f32_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2f32_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2f32.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9000,9 +11086,23 @@ define @test_vloxseg2_mask_nxv1f16_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1f16_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1f16.nxv1i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9029,9 +11129,23 @@ define @test_vloxseg2_mask_nxv1f16_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1f16_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1f16.nxv1i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9058,9 +11172,23 @@ define @test_vloxseg2_mask_nxv1f16_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1f16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1f16.nxv1i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9087,11 +11215,25 @@ define @test_vloxseg3_mask_nxv1f16_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1f16_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1f16.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9118,11 +11260,25 @@ define @test_vloxseg3_mask_nxv1f16_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1f16_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1f16.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9149,11 +11305,25 @@ define @test_vloxseg3_mask_nxv1f16_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1f16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1f16.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9705,9 +11875,23 @@ define @test_vloxseg2_mask_nxv1f32_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1f32_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1f32.nxv1i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9734,9 +11918,23 @@ define @test_vloxseg2_mask_nxv1f32_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1f32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1f32.nxv1i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9763,9 +11961,23 @@ define @test_vloxseg2_mask_nxv1f32_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1f32_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1f32.nxv1i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9792,11 +12004,25 @@ define @test_vloxseg3_mask_nxv1f32_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1f32_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1f32.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9823,11 +12049,25 @@ define @test_vloxseg3_mask_nxv1f32_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1f32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1f32.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9854,11 +12094,25 @@ define @test_vloxseg3_mask_nxv1f32_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1f32_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1f32.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10410,9 +12664,31 @@ define @test_vloxseg2_mask_nxv8f16_nxv8i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8f16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8f16.nxv8i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10439,9 +12715,31 @@ define @test_vloxseg2_mask_nxv8f16_nxv8i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8f16_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8f16.nxv8i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10468,9 +12766,31 @@ define @test_vloxseg2_mask_nxv8f16_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8f16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8f16.nxv8i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10497,11 +12817,33 @@ define @test_vloxseg3_mask_nxv8f16_nxv8i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv8f16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv8f16.nxv8i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10528,11 +12870,33 @@ define @test_vloxseg3_mask_nxv8f16_nxv8i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv8f16_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv8f16.nxv8i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10559,10 +12923,32 @@ define @test_vloxseg3_mask_nxv8f16_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv8f16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv8f16.nxv8i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10655,12 +13041,34 @@ define @test_vloxseg4_mask_nxv8f16_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv8f16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv4r.v v16, v12 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg4ei32.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv8f16.nxv8i32( %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10687,9 +13095,53 @@ define @test_vloxseg2_mask_nxv8f32_nxv8i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8f32_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8f32.nxv8i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10716,9 +13168,53 @@ define @test_vloxseg2_mask_nxv8f32_nxv8i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8f32_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8f32.nxv8i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10745,9 +13241,53 @@ define @test_vloxseg2_mask_nxv8f32_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8f32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8f32.nxv8i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10774,9 +13314,31 @@ define @test_vloxseg2_mask_nxv2f64_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2f64_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2f64.nxv2i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10803,9 +13365,31 @@ define @test_vloxseg2_mask_nxv2f64_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2f64_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2f64.nxv2i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10832,9 +13416,31 @@ define @test_vloxseg2_mask_nxv2f64_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2f64_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2f64.nxv2i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10861,11 +13467,33 @@ define @test_vloxseg3_mask_nxv2f64_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2f64_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2f64.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10892,11 +13520,33 @@ define @test_vloxseg3_mask_nxv2f64_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2f64_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2f64.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10923,11 +13573,33 @@ define @test_vloxseg3_mask_nxv2f64_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2f64_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2f64.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11053,9 +13725,23 @@ define @test_vloxseg2_mask_nxv4f16_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4f16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4f16.nxv4i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11082,9 +13768,23 @@ define @test_vloxseg2_mask_nxv4f16_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4f16_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4f16.nxv4i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11111,9 +13811,23 @@ define @test_vloxseg2_mask_nxv4f16_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4f16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4f16.nxv4i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11140,11 +13854,25 @@ define @test_vloxseg3_mask_nxv4f16_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4f16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4f16.nxv4i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11171,11 +13899,25 @@ define @test_vloxseg3_mask_nxv4f16_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4f16_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4f16.nxv4i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11202,10 +13944,24 @@ define @test_vloxseg3_mask_nxv4f16_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4f16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4f16.nxv4i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11298,12 +14054,26 @@ define @test_vloxseg4_mask_nxv4f16_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv4f16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg4ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv4f16.nxv4i32( %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11756,9 +14526,23 @@ define @test_vloxseg2_mask_nxv2f16_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2f16_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2f16.nxv2i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11785,9 +14569,23 @@ define @test_vloxseg2_mask_nxv2f16_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2f16_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2f16.nxv2i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11814,9 +14612,23 @@ define @test_vloxseg2_mask_nxv2f16_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2f16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2f16.nxv2i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11843,11 +14655,25 @@ define @test_vloxseg3_mask_nxv2f16_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2f16_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2f16.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11874,11 +14700,25 @@ define @test_vloxseg3_mask_nxv2f16_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2f16_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2f16.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11905,11 +14745,25 @@ define @test_vloxseg3_mask_nxv2f16_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2f16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2f16.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -12461,9 +15315,31 @@ define @test_vloxseg2_mask_nxv4f32_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4f32_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4f32.nxv4i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -12490,9 +15366,31 @@ define @test_vloxseg2_mask_nxv4f32_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4f32_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4f32.nxv4i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -12519,9 +15417,31 @@ define @test_vloxseg2_mask_nxv4f32_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4f32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4f32.nxv4i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -12548,11 +15468,33 @@ define @test_vloxseg3_mask_nxv4f32_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4f32_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4f32.nxv4i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -12579,11 +15521,33 @@ define @test_vloxseg3_mask_nxv4f32_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4f32_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4f32.nxv4i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -12610,11 +15574,33 @@ define @test_vloxseg3_mask_nxv4f32_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4f32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4f32.nxv4i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) diff --git a/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vloxseg-rv64.ll @@ -21,9 +21,53 @@ define @test_vloxseg2_mask_nxv16i16_nxv16i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv16i16.nxv16i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -50,9 +94,53 @@ define @test_vloxseg2_mask_nxv16i16_nxv16i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv16i16_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv16i16.nxv16i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -79,9 +167,53 @@ define @test_vloxseg2_mask_nxv16i16_nxv16i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv16i16_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v4, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv16i16.nxv16i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -108,9 +240,31 @@ define @test_vloxseg2_mask_nxv4i32_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i32.nxv4i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -137,9 +291,31 @@ define @test_vloxseg2_mask_nxv4i32_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i32_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i32.nxv4i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -166,9 +342,31 @@ define @test_vloxseg2_mask_nxv4i32_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i32_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i32.nxv4i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -195,9 +393,31 @@ define @test_vloxseg2_mask_nxv4i32_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i32.nxv4i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -224,11 +444,33 @@ define @test_vloxseg3_mask_nxv4i32_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4i32.nxv4i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -255,11 +497,33 @@ define @test_vloxseg3_mask_nxv4i32_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4i32_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4i32.nxv4i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -286,10 +550,32 @@ define @test_vloxseg3_mask_nxv4i32_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4i32_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4i32.nxv4i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -316,11 +602,33 @@ define @test_vloxseg3_mask_nxv4i32_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4i32.nxv4i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -413,12 +721,34 @@ define @test_vloxseg4_mask_nxv4i32_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv4i32_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv4r.v v16, v12 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg4ei64.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv4i32.nxv4i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -478,9 +808,31 @@ define @test_vloxseg2_mask_nxv16i8_nxv16i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv16i8_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv16i8.nxv16i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -507,9 +859,31 @@ define @test_vloxseg2_mask_nxv16i8_nxv16i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv16i8.nxv16i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -536,9 +910,31 @@ define @test_vloxseg2_mask_nxv16i8_nxv16i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv16i8_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv16i8.nxv16i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -565,10 +961,32 @@ define @test_vloxseg3_mask_nxv16i8_nxv16i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv16i8_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv16i8.nxv16i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -595,11 +1013,33 @@ define @test_vloxseg3_mask_nxv16i8_nxv16i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv16i8.nxv16i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -626,10 +1066,32 @@ define @test_vloxseg3_mask_nxv16i8_nxv16i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv16i8_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv16i8.nxv16i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -656,12 +1118,34 @@ define @test_vloxseg4_mask_nxv16i8_nxv16i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv16i8_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv4r.v v16, v12 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vloxseg4ei16.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv16i8.nxv16i16( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -721,11 +1205,33 @@ define @test_vloxseg4_mask_nxv16i8_nxv16i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv16i8_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vloxseg4ei32.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv16i8.nxv16i32( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -752,9 +1258,23 @@ define @test_vloxseg2_mask_nxv1i64_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i64.nxv1i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -781,9 +1301,23 @@ define @test_vloxseg2_mask_nxv1i64_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i64_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i64.nxv1i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -810,9 +1344,23 @@ define @test_vloxseg2_mask_nxv1i64_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i64_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i64.nxv1i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -839,9 +1387,23 @@ define @test_vloxseg2_mask_nxv1i64_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i64_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i64.nxv1i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -868,11 +1430,25 @@ define @test_vloxseg3_mask_nxv1i64_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i64.nxv1i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -899,11 +1475,25 @@ define @test_vloxseg3_mask_nxv1i64_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i64_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i64.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -930,11 +1520,25 @@ define @test_vloxseg3_mask_nxv1i64_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i64_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i64.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -961,11 +1565,25 @@ define @test_vloxseg3_mask_nxv1i64_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i64_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i64.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -1692,9 +2310,23 @@ define @test_vloxseg2_mask_nxv1i32_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i32_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i32.nxv1i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -1721,9 +2353,23 @@ define @test_vloxseg2_mask_nxv1i32_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i32.nxv1i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -1750,9 +2396,23 @@ define @test_vloxseg2_mask_nxv1i32_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i32.nxv1i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -1779,9 +2439,23 @@ define @test_vloxseg2_mask_nxv1i32_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i32_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i32.nxv1i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -1808,11 +2482,25 @@ define @test_vloxseg3_mask_nxv1i32_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i32_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i32.nxv1i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -1839,11 +2527,25 @@ define @test_vloxseg3_mask_nxv1i32_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i32.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -1870,11 +2572,25 @@ define @test_vloxseg3_mask_nxv1i32_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i32.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -1901,11 +2617,25 @@ define @test_vloxseg3_mask_nxv1i32_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i32_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i32.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2632,9 +3362,31 @@ define @test_vloxseg2_mask_nxv8i16_nxv8i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8i16.nxv8i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2661,9 +3413,31 @@ define @test_vloxseg2_mask_nxv8i16_nxv8i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8i16.nxv8i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2690,9 +3464,31 @@ define @test_vloxseg2_mask_nxv8i16_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8i16_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8i16.nxv8i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2719,9 +3515,31 @@ define @test_vloxseg2_mask_nxv8i16_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8i16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8i16.nxv8i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2748,11 +3566,33 @@ define @test_vloxseg3_mask_nxv8i16_nxv8i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv8i16.nxv8i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2779,11 +3619,33 @@ define @test_vloxseg3_mask_nxv8i16_nxv8i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv8i16.nxv8i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2810,10 +3672,32 @@ define @test_vloxseg3_mask_nxv8i16_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv8i16_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv8i16.nxv8i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2840,10 +3724,32 @@ define @test_vloxseg3_mask_nxv8i16_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv8i16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv8i16.nxv8i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2936,11 +3842,33 @@ define @test_vloxseg4_mask_nxv8i16_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv8i16_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg4ei64.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv8i16.nxv8i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2967,12 +3895,34 @@ define @test_vloxseg4_mask_nxv8i16_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv8i16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv4r.v v16, v12 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg4ei32.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv8i16.nxv8i32( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2999,9 +3949,23 @@ define @test_vloxseg2_mask_nxv4i8_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i8_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i8.nxv4i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3028,9 +3992,23 @@ define @test_vloxseg2_mask_nxv4i8_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i8.nxv4i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3057,9 +4035,23 @@ define @test_vloxseg2_mask_nxv4i8_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i8_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i8.nxv4i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3086,9 +4078,23 @@ define @test_vloxseg2_mask_nxv4i8_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i8_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i8.nxv4i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3115,10 +4121,24 @@ define @test_vloxseg3_mask_nxv4i8_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4i8_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4i8.nxv4i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3145,11 +4165,25 @@ define @test_vloxseg3_mask_nxv4i8_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4i8.nxv4i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3176,10 +4210,24 @@ define @test_vloxseg3_mask_nxv4i8_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4i8_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4i8.nxv4i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3206,11 +4254,25 @@ define @test_vloxseg3_mask_nxv4i8_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4i8_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4i8.nxv4i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3237,12 +4299,26 @@ define @test_vloxseg4_mask_nxv4i8_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv4i8_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vloxseg4ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv4i8.nxv4i32( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3302,11 +4378,25 @@ define @test_vloxseg4_mask_nxv4i8_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv4i8_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vloxseg4ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv4i8.nxv4i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3434,12 +4524,26 @@ define @test_vloxseg5_mask_nxv4i8_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg5_mask_nxv4i8_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vmv1r.v v11, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vloxseg5ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vloxseg5.mask.nxv4i8.nxv4i64( %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3570,6 +4674,14 @@ define @test_vloxseg6_mask_nxv4i8_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg6_mask_nxv4i8_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3578,6 +4690,12 @@ ; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vloxseg6ei64.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vloxseg6.mask.nxv4i8.nxv4i64( %val, %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3931,9 +5049,23 @@ define @test_vloxseg2_mask_nxv1i16_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i16_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i16.nxv1i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3960,9 +5092,23 @@ define @test_vloxseg2_mask_nxv1i16_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i16_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i16.nxv1i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3989,9 +5135,23 @@ define @test_vloxseg2_mask_nxv1i16_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i16.nxv1i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -4018,9 +5178,23 @@ define @test_vloxseg2_mask_nxv1i16_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i16.nxv1i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -4047,11 +5221,25 @@ define @test_vloxseg3_mask_nxv1i16_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i16_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i16.nxv1i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -4078,11 +5266,25 @@ define @test_vloxseg3_mask_nxv1i16_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i16_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i16.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -4109,11 +5311,25 @@ define @test_vloxseg3_mask_nxv1i16_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i16.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -4140,11 +5356,25 @@ define @test_vloxseg3_mask_nxv1i16_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i16.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -4871,9 +6101,23 @@ define @test_vloxseg2_mask_nxv2i32_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i32.nxv2i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -4900,9 +6144,23 @@ define @test_vloxseg2_mask_nxv2i32_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i32_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i32.nxv2i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -4929,9 +6187,23 @@ define @test_vloxseg2_mask_nxv2i32_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i32.nxv2i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -4958,9 +6230,23 @@ define @test_vloxseg2_mask_nxv2i32_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i32_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i32.nxv2i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -4987,11 +6273,25 @@ define @test_vloxseg3_mask_nxv2i32_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i32.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5018,11 +6318,25 @@ define @test_vloxseg3_mask_nxv2i32_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i32_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i32.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5049,11 +6363,25 @@ define @test_vloxseg3_mask_nxv2i32_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i32.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5080,10 +6408,24 @@ define @test_vloxseg3_mask_nxv2i32_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i32_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i32.nxv2i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5209,12 +6551,26 @@ define @test_vloxseg4_mask_nxv2i32_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv2i32_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg4ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv2i32.nxv2i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5809,9 +7165,23 @@ define @test_vloxseg2_mask_nxv8i8_nxv8i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8i8_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8i8.nxv8i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5838,9 +7208,23 @@ define @test_vloxseg2_mask_nxv8i8_nxv8i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8i8.nxv8i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5867,9 +7251,23 @@ define @test_vloxseg2_mask_nxv8i8_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8i8_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8i8.nxv8i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5896,9 +7294,23 @@ define @test_vloxseg2_mask_nxv8i8_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8i8_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8i8.nxv8i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5925,10 +7337,24 @@ define @test_vloxseg3_mask_nxv8i8_nxv8i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv8i8_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv8i8.nxv8i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5955,11 +7381,25 @@ define @test_vloxseg3_mask_nxv8i8_nxv8i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv8i8.nxv8i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5986,10 +7426,24 @@ define @test_vloxseg3_mask_nxv8i8_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv8i8_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv8i8.nxv8i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6016,10 +7470,24 @@ define @test_vloxseg3_mask_nxv8i8_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv8i8_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv8i8.nxv8i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6046,12 +7514,26 @@ define @test_vloxseg4_mask_nxv8i8_nxv8i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv8i8_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg4ei16.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv8i8.nxv8i16( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6111,11 +7593,25 @@ define @test_vloxseg4_mask_nxv8i8_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv8i8_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg4ei64.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv8i8.nxv8i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6142,11 +7638,25 @@ define @test_vloxseg4_mask_nxv8i8_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv8i8_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg4ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv8i8.nxv8i32( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6241,12 +7751,26 @@ define @test_vloxseg5_mask_nxv8i8_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg5_mask_nxv8i8_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vmv1r.v v11, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg5ei64.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vloxseg5.mask.nxv8i8.nxv8i64( %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6273,12 +7797,26 @@ define @test_vloxseg5_mask_nxv8i8_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg5_mask_nxv8i8_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vmv1r.v v11, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg5ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vloxseg5.mask.nxv8i8.nxv8i32( %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6375,6 +7913,14 @@ define @test_vloxseg6_mask_nxv8i8_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg6_mask_nxv8i8_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -6382,6 +7928,12 @@ ; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg6ei64.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vloxseg6.mask.nxv8i8.nxv8i64( %val, %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6408,6 +7960,14 @@ define @test_vloxseg6_mask_nxv8i8_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg6_mask_nxv8i8_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -6416,6 +7976,12 @@ ; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg6ei32.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vloxseg6.mask.nxv8i8.nxv8i32( %val, %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6514,6 +8080,14 @@ define @test_vloxseg7_mask_nxv8i8_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg7_mask_nxv8i8_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -6522,6 +8096,12 @@ ; CHECK-NEXT: vmv1r.v v13, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg7ei64.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vloxseg7.mask.nxv8i8.nxv8i64( %val, %val, %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6658,6 +8238,14 @@ define @test_vloxseg8_mask_nxv8i8_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg8_mask_nxv8i8_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -6667,6 +8255,12 @@ ; CHECK-NEXT: vmv1r.v v14, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vloxseg8ei64.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vloxseg8.mask.nxv8i8.nxv8i64( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6730,9 +8324,53 @@ define @test_vloxseg2_mask_nxv4i64_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i64_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i64.nxv4i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6759,9 +8397,53 @@ define @test_vloxseg2_mask_nxv4i64_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i64_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i64.nxv4i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6788,9 +8470,53 @@ define @test_vloxseg2_mask_nxv4i64_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i64.nxv4i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6817,9 +8543,53 @@ define @test_vloxseg2_mask_nxv4i64_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i64_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i64.nxv4i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6846,9 +8616,23 @@ define @test_vloxseg2_mask_nxv4i16_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i16.nxv4i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6875,9 +8659,23 @@ define @test_vloxseg2_mask_nxv4i16_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i16.nxv4i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6904,9 +8702,23 @@ define @test_vloxseg2_mask_nxv4i16_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i16_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i16.nxv4i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6933,9 +8745,23 @@ define @test_vloxseg2_mask_nxv4i16_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4i16.nxv4i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6962,10 +8788,24 @@ define @test_vloxseg3_mask_nxv4i16_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4i16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4i16.nxv4i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6992,11 +8832,25 @@ define @test_vloxseg3_mask_nxv4i16_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4i16.nxv4i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7023,10 +8877,24 @@ define @test_vloxseg3_mask_nxv4i16_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4i16_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4i16.nxv4i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7053,11 +8921,25 @@ define @test_vloxseg3_mask_nxv4i16_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4i16.nxv4i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7084,12 +8966,26 @@ define @test_vloxseg4_mask_nxv4i16_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv4i16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg4ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv4i16.nxv4i32( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7149,11 +9045,25 @@ define @test_vloxseg4_mask_nxv4i16_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv4i16_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg4ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv4i16.nxv4i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7281,12 +9191,26 @@ define @test_vloxseg5_mask_nxv4i16_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg5_mask_nxv4i16_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vmv1r.v v11, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg5ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vloxseg5.mask.nxv4i16.nxv4i64( %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7417,6 +9341,14 @@ define @test_vloxseg6_mask_nxv4i16_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg6_mask_nxv4i16_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -7425,6 +9357,12 @@ ; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg6ei64.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vloxseg6.mask.nxv4i16.nxv4i64( %val, %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7778,9 +9716,23 @@ define @test_vloxseg2_mask_nxv1i8_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i8_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i8.nxv1i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7807,9 +9759,23 @@ define @test_vloxseg2_mask_nxv1i8_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i8_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i8.nxv1i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7836,9 +9802,23 @@ define @test_vloxseg2_mask_nxv1i8_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i8_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i8.nxv1i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7865,9 +9845,23 @@ define @test_vloxseg2_mask_nxv1i8_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1i8.nxv1i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7894,11 +9888,25 @@ define @test_vloxseg3_mask_nxv1i8_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i8_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i8.nxv1i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7925,11 +9933,25 @@ define @test_vloxseg3_mask_nxv1i8_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i8_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i8.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7956,11 +9978,25 @@ define @test_vloxseg3_mask_nxv1i8_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i8_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i8.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7987,11 +10023,25 @@ define @test_vloxseg3_mask_nxv1i8_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1i8.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -8718,9 +10768,23 @@ define @test_vloxseg2_mask_nxv2i8_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i8_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i8.nxv2i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -8747,9 +10811,23 @@ define @test_vloxseg2_mask_nxv2i8_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i8.nxv2i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -8776,9 +10854,23 @@ define @test_vloxseg2_mask_nxv2i8_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i8_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i8.nxv2i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -8805,9 +10897,23 @@ define @test_vloxseg2_mask_nxv2i8_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i8_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i8.nxv2i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -8834,11 +10940,25 @@ define @test_vloxseg3_mask_nxv2i8_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i8_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i8.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -8865,11 +10985,25 @@ define @test_vloxseg3_mask_nxv2i8_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i8.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -8896,11 +11030,25 @@ define @test_vloxseg3_mask_nxv2i8_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i8_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i8.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -8927,10 +11075,24 @@ define @test_vloxseg3_mask_nxv2i8_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i8_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i8.nxv2i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9056,12 +11218,26 @@ define @test_vloxseg4_mask_nxv2i8_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv2i8_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vloxseg4ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv2i8.nxv2i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9656,9 +11832,53 @@ define @test_vloxseg2_mask_nxv8i32_nxv8i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8i32_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8i32.nxv8i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9685,9 +11905,53 @@ define @test_vloxseg2_mask_nxv8i32_nxv8i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8i32_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8i32.nxv8i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9714,9 +11978,53 @@ define @test_vloxseg2_mask_nxv8i32_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8i32_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v4, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8i32.nxv8i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9743,9 +12051,53 @@ define @test_vloxseg2_mask_nxv8i32_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8i32.nxv8i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9772,9 +12124,53 @@ define @test_vloxseg2_mask_nxv32i8_nxv32i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv32i8_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv32i8.nxv32i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9801,9 +12197,53 @@ define @test_vloxseg2_mask_nxv32i8_nxv32i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv32i8.nxv32i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9830,9 +12270,23 @@ define @test_vloxseg2_mask_nxv2i16_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i16_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i16.nxv2i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9859,9 +12313,23 @@ define @test_vloxseg2_mask_nxv2i16_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i16.nxv2i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9888,9 +12356,23 @@ define @test_vloxseg2_mask_nxv2i16_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i16.nxv2i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9917,9 +12399,23 @@ define @test_vloxseg2_mask_nxv2i16_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i16_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i16.nxv2i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9946,11 +12442,25 @@ define @test_vloxseg3_mask_nxv2i16_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i16_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i16.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9977,11 +12487,25 @@ define @test_vloxseg3_mask_nxv2i16_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i16.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10008,11 +12532,25 @@ define @test_vloxseg3_mask_nxv2i16_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i16.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10039,10 +12577,24 @@ define @test_vloxseg3_mask_nxv2i16_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i16_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i16.nxv2i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10168,12 +12720,26 @@ define @test_vloxseg4_mask_nxv2i16_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv2i16_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg4ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv2i16.nxv2i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10768,9 +13334,31 @@ define @test_vloxseg2_mask_nxv2i64_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i64_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i64.nxv2i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10797,9 +13385,31 @@ define @test_vloxseg2_mask_nxv2i64_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i64_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i64.nxv2i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10826,9 +13436,31 @@ define @test_vloxseg2_mask_nxv2i64_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i64_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i64.nxv2i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10855,9 +13487,31 @@ define @test_vloxseg2_mask_nxv2i64_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2i64.nxv2i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10884,11 +13538,33 @@ define @test_vloxseg3_mask_nxv2i64_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i64_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i64.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10915,11 +13591,33 @@ define @test_vloxseg3_mask_nxv2i64_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i64_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i64.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10946,11 +13644,33 @@ define @test_vloxseg3_mask_nxv2i64_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i64_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i64.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10977,11 +13697,33 @@ define @test_vloxseg3_mask_nxv2i64_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2i64.nxv2i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11140,9 +13882,53 @@ define @test_vloxseg2_mask_nxv16f16_nxv16i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv16f16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv16f16.nxv16i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11169,9 +13955,53 @@ define @test_vloxseg2_mask_nxv16f16_nxv16i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv16f16_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv16f16.nxv16i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11198,9 +14028,53 @@ define @test_vloxseg2_mask_nxv16f16_nxv16i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv16f16_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v4, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv16f16.nxv16i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11227,9 +14101,53 @@ define @test_vloxseg2_mask_nxv4f64_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4f64_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4f64.nxv4i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11256,9 +14174,53 @@ define @test_vloxseg2_mask_nxv4f64_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4f64_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4f64.nxv4i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11285,9 +14247,53 @@ define @test_vloxseg2_mask_nxv4f64_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4f64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4f64.nxv4i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11314,9 +14320,53 @@ define @test_vloxseg2_mask_nxv4f64_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4f64_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4f64.nxv4i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11343,9 +14393,23 @@ define @test_vloxseg2_mask_nxv1f64_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1f64_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1f64.nxv1i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11372,9 +14436,23 @@ define @test_vloxseg2_mask_nxv1f64_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1f64_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1f64.nxv1i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11401,9 +14479,23 @@ define @test_vloxseg2_mask_nxv1f64_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1f64_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1f64.nxv1i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11430,9 +14522,23 @@ define @test_vloxseg2_mask_nxv1f64_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1f64_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1f64.nxv1i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11459,11 +14565,25 @@ define @test_vloxseg3_mask_nxv1f64_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1f64_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1f64.nxv1i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11490,11 +14610,25 @@ define @test_vloxseg3_mask_nxv1f64_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1f64_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1f64.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11521,11 +14655,25 @@ define @test_vloxseg3_mask_nxv1f64_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1f64_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1f64.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11552,11 +14700,25 @@ define @test_vloxseg3_mask_nxv1f64_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1f64_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1f64.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -12283,9 +15445,23 @@ define @test_vloxseg2_mask_nxv2f32_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2f32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2f32.nxv2i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -12312,9 +15488,23 @@ define @test_vloxseg2_mask_nxv2f32_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2f32_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2f32.nxv2i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -12341,9 +15531,23 @@ define @test_vloxseg2_mask_nxv2f32_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2f32_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2f32.nxv2i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -12370,9 +15574,23 @@ define @test_vloxseg2_mask_nxv2f32_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2f32_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2f32.nxv2i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -12399,11 +15617,25 @@ define @test_vloxseg3_mask_nxv2f32_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2f32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2f32.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -12430,11 +15662,25 @@ define @test_vloxseg3_mask_nxv2f32_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2f32_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2f32.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -12461,11 +15707,25 @@ define @test_vloxseg3_mask_nxv2f32_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2f32_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2f32.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -12492,10 +15752,24 @@ define @test_vloxseg3_mask_nxv2f32_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2f32_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2f32.nxv2i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -12621,12 +15895,26 @@ define @test_vloxseg4_mask_nxv2f32_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv2f32_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vloxseg4ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv2f32.nxv2i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -13221,9 +16509,23 @@ define @test_vloxseg2_mask_nxv1f16_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1f16_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1f16.nxv1i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -13250,9 +16552,23 @@ define @test_vloxseg2_mask_nxv1f16_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1f16_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1f16.nxv1i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -13279,9 +16595,23 @@ define @test_vloxseg2_mask_nxv1f16_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1f16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1f16.nxv1i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -13308,9 +16638,23 @@ define @test_vloxseg2_mask_nxv1f16_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1f16_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1f16.nxv1i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -13337,11 +16681,25 @@ define @test_vloxseg3_mask_nxv1f16_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1f16_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1f16.nxv1i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -13368,11 +16726,25 @@ define @test_vloxseg3_mask_nxv1f16_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1f16_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1f16.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -13399,11 +16771,25 @@ define @test_vloxseg3_mask_nxv1f16_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1f16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1f16.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -13430,11 +16816,25 @@ define @test_vloxseg3_mask_nxv1f16_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1f16_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1f16.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -14161,9 +17561,23 @@ define @test_vloxseg2_mask_nxv1f32_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1f32_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1f32.nxv1i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -14190,9 +17604,23 @@ define @test_vloxseg2_mask_nxv1f32_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1f32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1f32.nxv1i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -14219,9 +17647,23 @@ define @test_vloxseg2_mask_nxv1f32_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1f32_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1f32.nxv1i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -14248,9 +17690,23 @@ define @test_vloxseg2_mask_nxv1f32_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv1f32_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv1f32.nxv1i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -14277,11 +17733,25 @@ define @test_vloxseg3_mask_nxv1f32_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1f32_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1f32.nxv1i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -14308,11 +17778,25 @@ define @test_vloxseg3_mask_nxv1f32_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1f32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1f32.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -14339,11 +17823,25 @@ define @test_vloxseg3_mask_nxv1f32_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1f32_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1f32.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -14370,11 +17868,25 @@ define @test_vloxseg3_mask_nxv1f32_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv1f32_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv1f32.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15101,9 +18613,31 @@ define @test_vloxseg2_mask_nxv8f16_nxv8i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8f16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8f16.nxv8i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15130,9 +18664,31 @@ define @test_vloxseg2_mask_nxv8f16_nxv8i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8f16_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8f16.nxv8i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15159,9 +18715,31 @@ define @test_vloxseg2_mask_nxv8f16_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8f16_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8f16.nxv8i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15188,9 +18766,31 @@ define @test_vloxseg2_mask_nxv8f16_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8f16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8f16.nxv8i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15217,11 +18817,33 @@ define @test_vloxseg3_mask_nxv8f16_nxv8i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv8f16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv8f16.nxv8i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15248,11 +18870,33 @@ define @test_vloxseg3_mask_nxv8f16_nxv8i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv8f16_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv8f16.nxv8i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15279,10 +18923,32 @@ define @test_vloxseg3_mask_nxv8f16_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv8f16_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv8f16.nxv8i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15309,10 +18975,32 @@ define @test_vloxseg3_mask_nxv8f16_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv8f16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv8f16.nxv8i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15405,11 +19093,33 @@ define @test_vloxseg4_mask_nxv8f16_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv8f16_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg4ei64.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv8f16.nxv8i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15436,12 +19146,34 @@ define @test_vloxseg4_mask_nxv8f16_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv8f16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv4r.v v16, v12 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vloxseg4ei32.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv8f16.nxv8i32( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15468,9 +19200,53 @@ define @test_vloxseg2_mask_nxv8f32_nxv8i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8f32_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8f32.nxv8i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15497,9 +19273,53 @@ define @test_vloxseg2_mask_nxv8f32_nxv8i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8f32_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8f32.nxv8i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15526,9 +19346,53 @@ define @test_vloxseg2_mask_nxv8f32_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8f32_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v4, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8f32.nxv8i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15555,9 +19419,53 @@ define @test_vloxseg2_mask_nxv8f32_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv8f32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv8f32.nxv8i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15584,9 +19492,31 @@ define @test_vloxseg2_mask_nxv2f64_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2f64_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2f64.nxv2i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15613,9 +19543,31 @@ define @test_vloxseg2_mask_nxv2f64_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2f64_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2f64.nxv2i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15642,9 +19594,31 @@ define @test_vloxseg2_mask_nxv2f64_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2f64_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2f64.nxv2i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15671,9 +19645,31 @@ define @test_vloxseg2_mask_nxv2f64_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2f64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2f64.nxv2i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15700,11 +19696,33 @@ define @test_vloxseg3_mask_nxv2f64_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2f64_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2f64.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15731,11 +19749,33 @@ define @test_vloxseg3_mask_nxv2f64_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2f64_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2f64.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15762,11 +19802,33 @@ define @test_vloxseg3_mask_nxv2f64_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2f64_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2f64.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15793,11 +19855,33 @@ define @test_vloxseg3_mask_nxv2f64_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2f64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2f64.nxv2i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15956,9 +20040,23 @@ define @test_vloxseg2_mask_nxv4f16_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4f16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4f16.nxv4i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15985,9 +20083,23 @@ define @test_vloxseg2_mask_nxv4f16_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4f16_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4f16.nxv4i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16014,9 +20126,23 @@ define @test_vloxseg2_mask_nxv4f16_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4f16_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4f16.nxv4i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16043,9 +20169,23 @@ define @test_vloxseg2_mask_nxv4f16_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4f16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4f16.nxv4i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16072,10 +20212,24 @@ define @test_vloxseg3_mask_nxv4f16_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4f16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4f16.nxv4i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16102,11 +20256,25 @@ define @test_vloxseg3_mask_nxv4f16_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4f16_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4f16.nxv4i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16133,10 +20301,24 @@ define @test_vloxseg3_mask_nxv4f16_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4f16_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4f16.nxv4i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16163,11 +20345,25 @@ define @test_vloxseg3_mask_nxv4f16_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4f16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4f16.nxv4i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16194,12 +20390,26 @@ define @test_vloxseg4_mask_nxv4f16_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv4f16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg4ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv4f16.nxv4i32( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16259,11 +20469,25 @@ define @test_vloxseg4_mask_nxv4f16_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv4f16_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg4ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv4f16.nxv4i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16391,12 +20615,26 @@ define @test_vloxseg5_mask_nxv4f16_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg5_mask_nxv4f16_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vmv1r.v v11, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg5ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vloxseg5.mask.nxv4f16.nxv4i64( %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16527,6 +20765,14 @@ define @test_vloxseg6_mask_nxv4f16_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg6_mask_nxv4f16_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -16535,6 +20781,12 @@ ; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vloxseg6ei64.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vloxseg6.mask.nxv4f16.nxv4i64( %val, %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16888,9 +21140,23 @@ define @test_vloxseg2_mask_nxv2f16_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2f16_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2f16.nxv2i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16917,9 +21183,23 @@ define @test_vloxseg2_mask_nxv2f16_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2f16_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2f16.nxv2i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16946,9 +21226,23 @@ define @test_vloxseg2_mask_nxv2f16_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2f16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2f16.nxv2i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16975,9 +21269,23 @@ define @test_vloxseg2_mask_nxv2f16_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv2f16_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv2f16.nxv2i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17004,11 +21312,25 @@ define @test_vloxseg3_mask_nxv2f16_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2f16_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2f16.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17035,11 +21357,25 @@ define @test_vloxseg3_mask_nxv2f16_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2f16_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2f16.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17066,11 +21402,25 @@ define @test_vloxseg3_mask_nxv2f16_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2f16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2f16.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17097,10 +21447,24 @@ define @test_vloxseg3_mask_nxv2f16_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv2f16_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv2f16.nxv2i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17226,12 +21590,26 @@ define @test_vloxseg4_mask_nxv2f16_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv2f16_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vloxseg4ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv2f16.nxv2i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17826,9 +22204,31 @@ define @test_vloxseg2_mask_nxv4f32_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4f32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg2ei32.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4f32.nxv4i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17855,9 +22255,31 @@ define @test_vloxseg2_mask_nxv4f32_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4f32_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4f32.nxv4i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17884,9 +22306,31 @@ define @test_vloxseg2_mask_nxv4f32_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4f32_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg2ei64.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4f32.nxv4i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17913,9 +22357,31 @@ define @test_vloxseg2_mask_nxv4f32_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg2_mask_nxv4f32_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vloxseg2.mask.nxv4f32.nxv4i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17942,11 +22408,33 @@ define @test_vloxseg3_mask_nxv4f32_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4f32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4f32.nxv4i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17973,11 +22461,33 @@ define @test_vloxseg3_mask_nxv4f32_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4f32_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4f32.nxv4i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -18004,10 +22514,32 @@ define @test_vloxseg3_mask_nxv4f32_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4f32_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg3ei64.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4f32.nxv4i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -18034,11 +22566,33 @@ define @test_vloxseg3_mask_nxv4f32_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg3_mask_nxv4f32_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vloxseg3.mask.nxv4f32.nxv4i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -18131,12 +22685,34 @@ define @test_vloxseg4_mask_nxv4f32_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vloxseg4_mask_nxv4f32_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv4r.v v16, v12 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vloxseg4ei64.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vloxseg4.mask.nxv4f32.nxv4i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) diff --git a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll @@ -8,8 +8,52 @@ define @test_vlseg2_nxv16i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vlseg2e16.v v4, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv16i16( undef, undef, ptr %base, i32 %vl) @@ -20,10 +64,54 @@ define @test_vlseg2_mask_nxv16i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vlseg2e16.v v4, (a0) ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv16i16( undef, undef, ptr %base, i32 %vl) @@ -39,8 +127,22 @@ define @test_vlseg2_nxv1i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1i8( undef, undef, ptr %base, i32 %vl) @@ -51,10 +153,24 @@ define @test_vlseg2_mask_nxv1i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vlseg2e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1i8( undef, undef, ptr %base, i32 %vl) @@ -70,8 +186,22 @@ define @test_vlseg3_nxv1i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1i8( undef, undef, undef, ptr %base, i32 %vl) @@ -82,11 +212,25 @@ define @test_vlseg3_mask_nxv1i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vlseg3e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1i8( undef, undef, undef, ptr %base, i32 %vl) @@ -102,8 +246,22 @@ define @test_vlseg4_nxv1i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1i8( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -114,12 +272,26 @@ define @test_vlseg4_mask_nxv1i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vlseg4e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1i8( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -135,8 +307,22 @@ define @test_vlseg5_nxv1i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1i8( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -147,6 +333,14 @@ define @test_vlseg5_mask_nxv1i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vlseg5e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -154,6 +348,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1i8( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -169,8 +369,22 @@ define @test_vlseg6_nxv1i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1i8( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -181,6 +395,14 @@ define @test_vlseg6_mask_nxv1i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vlseg6e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -189,6 +411,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1i8( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -204,8 +432,22 @@ define @test_vlseg7_nxv1i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -216,6 +458,14 @@ define @test_vlseg7_mask_nxv1i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vlseg7e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -225,6 +475,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -240,8 +496,22 @@ define @test_vlseg8_nxv1i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -252,6 +522,14 @@ define @test_vlseg8_mask_nxv1i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vlseg8e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -262,6 +540,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -277,8 +561,30 @@ define @test_vlseg2_nxv16i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vlseg2e8.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv16i8( undef, undef, ptr %base, i32 %vl) @@ -289,10 +595,32 @@ define @test_vlseg2_mask_nxv16i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vlseg2e8.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlseg2e8.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv16i8( undef, undef, ptr %base, i32 %vl) @@ -308,8 +636,30 @@ define @test_vlseg3_nxv16i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vlseg3e8.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv16i8( undef, undef, undef, ptr %base, i32 %vl) @@ -320,11 +670,33 @@ define @test_vlseg3_mask_nxv16i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vlseg3e8.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlseg3e8.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv16i8( undef, undef, undef, ptr %base, i32 %vl) @@ -340,8 +712,30 @@ define @test_vlseg4_nxv16i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vlseg4e8.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv16i8( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -352,12 +746,34 @@ define @test_vlseg4_mask_nxv16i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vlseg4e8.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlseg4e8.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv16i8( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -373,8 +789,22 @@ define @test_vlseg2_nxv2i32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2i32( undef, undef, ptr %base, i32 %vl) @@ -385,10 +815,24 @@ define @test_vlseg2_mask_nxv2i32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg2e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2i32( undef, undef, ptr %base, i32 %vl) @@ -404,8 +848,22 @@ define @test_vlseg3_nxv2i32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2i32( undef, undef, undef, ptr %base, i32 %vl) @@ -416,11 +874,25 @@ define @test_vlseg3_mask_nxv2i32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg3e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2i32( undef, undef, undef, ptr %base, i32 %vl) @@ -436,8 +908,22 @@ define @test_vlseg4_nxv2i32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2i32( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -448,12 +934,26 @@ define @test_vlseg4_mask_nxv2i32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg4e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2i32( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -469,8 +969,22 @@ define @test_vlseg5_nxv2i32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv2i32( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -481,6 +995,14 @@ define @test_vlseg5_mask_nxv2i32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg5e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -488,6 +1010,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv2i32( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -503,8 +1031,22 @@ define @test_vlseg6_nxv2i32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv2i32( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -515,6 +1057,14 @@ define @test_vlseg6_mask_nxv2i32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg6e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -523,6 +1073,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv2i32( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -538,8 +1094,22 @@ define @test_vlseg7_nxv2i32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv2i32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -550,6 +1120,14 @@ define @test_vlseg7_mask_nxv2i32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg7e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -559,6 +1137,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv2i32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -574,8 +1158,22 @@ define @test_vlseg8_nxv2i32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv2i32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -586,6 +1184,14 @@ define @test_vlseg8_mask_nxv2i32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg8e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -596,6 +1202,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv2i32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -611,8 +1223,22 @@ define @test_vlseg2_nxv4i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4i16( undef, undef, ptr %base, i32 %vl) @@ -623,10 +1249,24 @@ define @test_vlseg2_mask_nxv4i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg2e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4i16( undef, undef, ptr %base, i32 %vl) @@ -642,8 +1282,22 @@ define @test_vlseg3_nxv4i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4i16( undef, undef, undef, ptr %base, i32 %vl) @@ -654,11 +1308,25 @@ define @test_vlseg3_mask_nxv4i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg3e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4i16( undef, undef, undef, ptr %base, i32 %vl) @@ -674,8 +1342,22 @@ define @test_vlseg4_nxv4i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv4i16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -686,12 +1368,26 @@ define @test_vlseg4_mask_nxv4i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg4e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv4i16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -707,8 +1403,22 @@ define @test_vlseg5_nxv4i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv4i16( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -719,6 +1429,14 @@ define @test_vlseg5_mask_nxv4i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg5e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -726,6 +1444,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv4i16( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -741,8 +1465,22 @@ define @test_vlseg6_nxv4i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv4i16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -753,6 +1491,14 @@ define @test_vlseg6_mask_nxv4i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg6e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -761,6 +1507,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv4i16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -776,8 +1528,22 @@ define @test_vlseg7_nxv4i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv4i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -788,6 +1554,14 @@ define @test_vlseg7_mask_nxv4i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg7e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -797,6 +1571,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv4i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -812,8 +1592,22 @@ define @test_vlseg8_nxv4i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv4i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -824,6 +1618,14 @@ define @test_vlseg8_mask_nxv4i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg8e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -834,6 +1636,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv4i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -849,8 +1657,22 @@ define @test_vlseg2_nxv1i32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1i32( undef, undef, ptr %base, i32 %vl) @@ -861,10 +1683,24 @@ define @test_vlseg2_mask_nxv1i32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg2e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1i32( undef, undef, ptr %base, i32 %vl) @@ -880,8 +1716,22 @@ define @test_vlseg3_nxv1i32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1i32( undef, undef, undef, ptr %base, i32 %vl) @@ -892,11 +1742,25 @@ define @test_vlseg3_mask_nxv1i32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg3e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1i32( undef, undef, undef, ptr %base, i32 %vl) @@ -912,8 +1776,22 @@ define @test_vlseg4_nxv1i32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1i32( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -924,12 +1802,26 @@ define @test_vlseg4_mask_nxv1i32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg4e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1i32( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -945,8 +1837,22 @@ define @test_vlseg5_nxv1i32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1i32( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -957,6 +1863,14 @@ define @test_vlseg5_mask_nxv1i32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg5e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -964,6 +1878,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1i32( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -979,8 +1899,22 @@ define @test_vlseg6_nxv1i32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1i32( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -991,6 +1925,14 @@ define @test_vlseg6_mask_nxv1i32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg6e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -999,6 +1941,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1i32( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1014,8 +1962,22 @@ define @test_vlseg7_nxv1i32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1i32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1026,6 +1988,14 @@ define @test_vlseg7_mask_nxv1i32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg7e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1035,6 +2005,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1i32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1050,8 +2026,22 @@ define @test_vlseg8_nxv1i32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1i32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -1062,6 +2052,14 @@ define @test_vlseg8_mask_nxv1i32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg8e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1072,6 +2070,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1i32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -1087,8 +2091,30 @@ define @test_vlseg2_nxv8i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg2e16.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv8i16( undef, undef, ptr %base, i32 %vl) @@ -1099,10 +2125,32 @@ define @test_vlseg2_mask_nxv8i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vlseg2e16.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv8i16( undef, undef, ptr %base, i32 %vl) @@ -1118,8 +2166,30 @@ define @test_vlseg3_nxv8i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg3e16.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv8i16( undef, undef, undef, ptr %base, i32 %vl) @@ -1130,11 +2200,33 @@ define @test_vlseg3_mask_nxv8i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vlseg3e16.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv8i16( undef, undef, undef, ptr %base, i32 %vl) @@ -1150,8 +2242,30 @@ define @test_vlseg4_nxv8i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg4e16.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv8i16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1162,12 +2276,34 @@ define @test_vlseg4_mask_nxv8i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vlseg4e16.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv8i16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1183,8 +2319,22 @@ define @test_vlseg2_nxv8i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv8i8( undef, undef, ptr %base, i32 %vl) @@ -1195,10 +2345,24 @@ define @test_vlseg2_mask_nxv8i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vlseg2e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv8i8( undef, undef, ptr %base, i32 %vl) @@ -1214,8 +2378,22 @@ define @test_vlseg3_nxv8i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv8i8( undef, undef, undef, ptr %base, i32 %vl) @@ -1226,11 +2404,25 @@ define @test_vlseg3_mask_nxv8i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vlseg3e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv8i8( undef, undef, undef, ptr %base, i32 %vl) @@ -1246,8 +2438,22 @@ define @test_vlseg4_nxv8i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv8i8( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1258,12 +2464,26 @@ define @test_vlseg4_mask_nxv8i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vlseg4e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv8i8( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1279,8 +2499,22 @@ define @test_vlseg5_nxv8i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv8i8( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1291,6 +2525,14 @@ define @test_vlseg5_mask_nxv8i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vlseg5e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1298,6 +2540,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv8i8( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1313,8 +2561,22 @@ define @test_vlseg6_nxv8i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv8i8( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1325,6 +2587,14 @@ define @test_vlseg6_mask_nxv8i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vlseg6e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1333,6 +2603,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv8i8( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1348,8 +2624,22 @@ define @test_vlseg7_nxv8i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv8i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1360,6 +2650,14 @@ define @test_vlseg7_mask_nxv8i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vlseg7e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1369,6 +2667,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv8i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1384,8 +2688,22 @@ define @test_vlseg8_nxv8i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv8i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -1396,6 +2714,14 @@ define @test_vlseg8_mask_nxv8i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vlseg8e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1406,6 +2732,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv8i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -1421,8 +2753,52 @@ define @test_vlseg2_nxv8i32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: vlseg2e32.v v4, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv8i32( undef, undef, ptr %base, i32 %vl) @@ -1433,10 +2809,54 @@ define @test_vlseg2_mask_nxv8i32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vlseg2e32.v v4, (a0) ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlseg2e32.v v4, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv8i32( undef, undef, ptr %base, i32 %vl) @@ -1452,8 +2872,22 @@ define @test_vlseg2_nxv4i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4i8( undef, undef, ptr %base, i32 %vl) @@ -1464,10 +2898,24 @@ define @test_vlseg2_mask_nxv4i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vlseg2e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4i8( undef, undef, ptr %base, i32 %vl) @@ -1483,8 +2931,22 @@ define @test_vlseg3_nxv4i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4i8( undef, undef, undef, ptr %base, i32 %vl) @@ -1495,11 +2957,25 @@ define @test_vlseg3_mask_nxv4i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vlseg3e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4i8( undef, undef, undef, ptr %base, i32 %vl) @@ -1515,8 +2991,22 @@ define @test_vlseg4_nxv4i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv4i8( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1527,12 +3017,26 @@ define @test_vlseg4_mask_nxv4i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vlseg4e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv4i8( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1548,8 +3052,22 @@ define @test_vlseg5_nxv4i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv4i8( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1560,6 +3078,14 @@ define @test_vlseg5_mask_nxv4i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vlseg5e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1567,6 +3093,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv4i8( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1582,8 +3114,22 @@ define @test_vlseg6_nxv4i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv4i8( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1594,6 +3140,14 @@ define @test_vlseg6_mask_nxv4i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vlseg6e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1602,6 +3156,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv4i8( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1617,8 +3177,22 @@ define @test_vlseg7_nxv4i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv4i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1629,6 +3203,14 @@ define @test_vlseg7_mask_nxv4i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vlseg7e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1638,6 +3220,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv4i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1653,8 +3241,22 @@ define @test_vlseg8_nxv4i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv4i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -1665,6 +3267,14 @@ define @test_vlseg8_mask_nxv4i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vlseg8e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1675,6 +3285,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv4i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -1690,8 +3306,22 @@ define @test_vlseg2_nxv1i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1i16( undef, undef, ptr %base, i32 %vl) @@ -1702,10 +3332,24 @@ define @test_vlseg2_mask_nxv1i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg2e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1i16( undef, undef, ptr %base, i32 %vl) @@ -1721,8 +3365,22 @@ define @test_vlseg3_nxv1i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1i16( undef, undef, undef, ptr %base, i32 %vl) @@ -1733,11 +3391,25 @@ define @test_vlseg3_mask_nxv1i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg3e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1i16( undef, undef, undef, ptr %base, i32 %vl) @@ -1753,8 +3425,22 @@ define @test_vlseg4_nxv1i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1i16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1765,12 +3451,26 @@ define @test_vlseg4_mask_nxv1i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg4e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1i16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1786,8 +3486,22 @@ define @test_vlseg5_nxv1i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1i16( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1798,6 +3512,14 @@ define @test_vlseg5_mask_nxv1i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg5e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1805,6 +3527,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1i16( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1820,8 +3548,22 @@ define @test_vlseg6_nxv1i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1i16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1832,6 +3574,14 @@ define @test_vlseg6_mask_nxv1i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg6e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1840,6 +3590,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1i16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1855,8 +3611,22 @@ define @test_vlseg7_nxv1i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1867,6 +3637,14 @@ define @test_vlseg7_mask_nxv1i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg7e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1876,6 +3654,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1891,8 +3675,22 @@ define @test_vlseg8_nxv1i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -1903,6 +3701,14 @@ define @test_vlseg8_mask_nxv1i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg8e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1913,6 +3719,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -1928,8 +3740,52 @@ define @test_vlseg2_nxv32i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vlseg2e8.v v4, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv32i8( undef, undef, ptr %base, i32 %vl) @@ -1940,10 +3796,54 @@ define @test_vlseg2_mask_nxv32i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vlseg2e8.v v4, (a0) ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlseg2e8.v v4, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv32i8( undef, undef, ptr %base, i32 %vl) @@ -1959,8 +3859,22 @@ define @test_vlseg2_nxv2i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2i8( undef, undef, ptr %base, i32 %vl) @@ -1971,10 +3885,24 @@ define @test_vlseg2_mask_nxv2i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vlseg2e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2i8( undef, undef, ptr %base, i32 %vl) @@ -1990,8 +3918,22 @@ define @test_vlseg3_nxv2i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2i8( undef, undef, undef, ptr %base, i32 %vl) @@ -2002,11 +3944,25 @@ define @test_vlseg3_mask_nxv2i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vlseg3e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2i8( undef, undef, undef, ptr %base, i32 %vl) @@ -2022,8 +3978,22 @@ define @test_vlseg4_nxv2i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2i8( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2034,12 +4004,26 @@ define @test_vlseg4_mask_nxv2i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vlseg4e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2i8( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2055,8 +4039,22 @@ define @test_vlseg5_nxv2i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv2i8( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2067,6 +4065,14 @@ define @test_vlseg5_mask_nxv2i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vlseg5e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2074,6 +4080,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv2i8( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2089,8 +4101,22 @@ define @test_vlseg6_nxv2i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv2i8( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2101,6 +4127,14 @@ define @test_vlseg6_mask_nxv2i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vlseg6e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2109,6 +4143,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv2i8( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2124,8 +4164,22 @@ define @test_vlseg7_nxv2i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv2i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2136,6 +4190,14 @@ define @test_vlseg7_mask_nxv2i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vlseg7e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2145,6 +4207,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv2i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2160,8 +4228,22 @@ define @test_vlseg8_nxv2i8(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv2i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -2172,6 +4254,14 @@ define @test_vlseg8_mask_nxv2i8(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vlseg8e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2182,6 +4272,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv2i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -2197,8 +4293,22 @@ define @test_vlseg2_nxv2i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2i16( undef, undef, ptr %base, i32 %vl) @@ -2209,10 +4319,24 @@ define @test_vlseg2_mask_nxv2i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg2e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2i16( undef, undef, ptr %base, i32 %vl) @@ -2228,8 +4352,22 @@ define @test_vlseg3_nxv2i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2i16( undef, undef, undef, ptr %base, i32 %vl) @@ -2240,11 +4378,25 @@ define @test_vlseg3_mask_nxv2i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg3e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2i16( undef, undef, undef, ptr %base, i32 %vl) @@ -2260,8 +4412,22 @@ define @test_vlseg4_nxv2i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2i16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2272,12 +4438,26 @@ define @test_vlseg4_mask_nxv2i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg4e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2i16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2293,8 +4473,22 @@ define @test_vlseg5_nxv2i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv2i16( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2305,6 +4499,14 @@ define @test_vlseg5_mask_nxv2i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg5e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2312,6 +4514,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv2i16( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2327,8 +4535,22 @@ define @test_vlseg6_nxv2i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv2i16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2339,6 +4561,14 @@ define @test_vlseg6_mask_nxv2i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg6e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2347,6 +4577,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv2i16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2362,8 +4598,22 @@ define @test_vlseg7_nxv2i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv2i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2374,6 +4624,14 @@ define @test_vlseg7_mask_nxv2i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg7e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2383,6 +4641,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv2i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2398,8 +4662,22 @@ define @test_vlseg8_nxv2i16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv2i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -2410,6 +4688,14 @@ define @test_vlseg8_mask_nxv2i16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg8e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2420,6 +4706,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv2i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -2435,8 +4727,30 @@ define @test_vlseg2_nxv4i32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg2e32.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4i32( undef, undef, ptr %base, i32 %vl) @@ -2447,10 +4761,32 @@ define @test_vlseg2_mask_nxv4i32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vlseg2e32.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlseg2e32.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4i32( undef, undef, ptr %base, i32 %vl) @@ -2466,8 +4802,30 @@ define @test_vlseg3_nxv4i32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg3e32.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4i32( undef, undef, undef, ptr %base, i32 %vl) @@ -2478,11 +4836,33 @@ define @test_vlseg3_mask_nxv4i32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vlseg3e32.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlseg3e32.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4i32( undef, undef, undef, ptr %base, i32 %vl) @@ -2498,8 +4878,30 @@ define @test_vlseg4_nxv4i32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg4e32.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv4i32( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2510,12 +4912,34 @@ define @test_vlseg4_mask_nxv4i32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vlseg4e32.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlseg4e32.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv4i32( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2531,8 +4955,52 @@ define @test_vlseg2_nxv16f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vlseg2e16.v v4, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv16f16( undef, undef, ptr %base, i32 %vl) @@ -2543,10 +5011,54 @@ define @test_vlseg2_mask_nxv16f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vlseg2e16.v v4, (a0) ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv16f16( undef, undef, ptr %base, i32 %vl) @@ -2562,8 +5074,52 @@ define @test_vlseg2_nxv4f64(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; CHECK-NEXT: vlseg2e64.v v4, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4f64( undef, undef, ptr %base, i32 %vl) @@ -2574,10 +5130,54 @@ define @test_vlseg2_mask_nxv4f64(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vlseg2e64.v v4, (a0) ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlseg2e64.v v4, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4f64( undef, undef, ptr %base, i32 %vl) @@ -2593,8 +5193,22 @@ define @test_vlseg2_nxv1f64(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg2e64.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1f64( undef, undef, ptr %base, i32 %vl) @@ -2605,10 +5219,24 @@ define @test_vlseg2_mask_nxv1f64(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg2e64.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e64.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1f64( undef, undef, ptr %base, i32 %vl) @@ -2624,8 +5252,22 @@ define @test_vlseg3_nxv1f64(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg3e64.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1f64( undef, undef, undef, ptr %base, i32 %vl) @@ -2636,11 +5278,25 @@ define @test_vlseg3_mask_nxv1f64(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg3e64.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e64.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1f64( undef, undef, undef, ptr %base, i32 %vl) @@ -2656,8 +5312,22 @@ define @test_vlseg4_nxv1f64(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg4e64.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1f64( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2668,12 +5338,26 @@ define @test_vlseg4_mask_nxv1f64(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg4e64.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e64.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1f64( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2689,8 +5373,22 @@ define @test_vlseg5_nxv1f64(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg5e64.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1f64( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2701,6 +5399,14 @@ define @test_vlseg5_mask_nxv1f64(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg5e64.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2708,6 +5414,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e64.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1f64( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2723,8 +5435,22 @@ define @test_vlseg6_nxv1f64(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg6e64.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1f64( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2735,6 +5461,14 @@ define @test_vlseg6_mask_nxv1f64(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg6e64.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2743,6 +5477,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e64.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1f64( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2758,8 +5498,22 @@ define @test_vlseg7_nxv1f64(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg7e64.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1f64( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2770,6 +5524,14 @@ define @test_vlseg7_mask_nxv1f64(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg7e64.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2779,6 +5541,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e64.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1f64( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2794,8 +5562,22 @@ define @test_vlseg8_nxv1f64(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg8e64.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1f64( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -2806,6 +5588,14 @@ define @test_vlseg8_mask_nxv1f64(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg8e64.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2816,6 +5606,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e64.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1f64( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -2831,8 +5627,22 @@ define @test_vlseg2_nxv2f32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2f32( undef, undef, ptr %base, i32 %vl) @@ -2843,10 +5653,24 @@ define @test_vlseg2_mask_nxv2f32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg2e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2f32( undef, undef, ptr %base, i32 %vl) @@ -2862,8 +5686,22 @@ define @test_vlseg3_nxv2f32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2f32( undef, undef, undef, ptr %base, i32 %vl) @@ -2874,11 +5712,25 @@ define @test_vlseg3_mask_nxv2f32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg3e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2f32( undef, undef, undef, ptr %base, i32 %vl) @@ -2894,8 +5746,22 @@ define @test_vlseg4_nxv2f32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2f32( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2906,12 +5772,26 @@ define @test_vlseg4_mask_nxv2f32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg4e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2f32( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2927,8 +5807,22 @@ define @test_vlseg5_nxv2f32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv2f32( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2939,6 +5833,14 @@ define @test_vlseg5_mask_nxv2f32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg5e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2946,6 +5848,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv2f32( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2961,8 +5869,22 @@ define @test_vlseg6_nxv2f32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv2f32( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2973,6 +5895,14 @@ define @test_vlseg6_mask_nxv2f32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg6e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2981,6 +5911,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv2f32( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2996,8 +5932,22 @@ define @test_vlseg7_nxv2f32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv2f32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3008,6 +5958,14 @@ define @test_vlseg7_mask_nxv2f32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg7e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3017,6 +5975,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv2f32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3032,8 +5996,22 @@ define @test_vlseg8_nxv2f32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv2f32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -3044,6 +6022,14 @@ define @test_vlseg8_mask_nxv2f32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg8e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3054,6 +6040,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv2f32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -3069,8 +6061,22 @@ define @test_vlseg2_nxv1f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1f16( undef, undef, ptr %base, i32 %vl) @@ -3081,10 +6087,24 @@ define @test_vlseg2_mask_nxv1f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg2e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1f16( undef, undef, ptr %base, i32 %vl) @@ -3100,8 +6120,22 @@ define @test_vlseg3_nxv1f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1f16( undef, undef, undef, ptr %base, i32 %vl) @@ -3112,11 +6146,25 @@ define @test_vlseg3_mask_nxv1f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg3e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1f16( undef, undef, undef, ptr %base, i32 %vl) @@ -3132,8 +6180,22 @@ define @test_vlseg4_nxv1f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1f16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3144,12 +6206,26 @@ define @test_vlseg4_mask_nxv1f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg4e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1f16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3165,8 +6241,22 @@ define @test_vlseg5_nxv1f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1f16( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3177,6 +6267,14 @@ define @test_vlseg5_mask_nxv1f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg5e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3184,6 +6282,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1f16( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3199,8 +6303,22 @@ define @test_vlseg6_nxv1f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1f16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3211,6 +6329,14 @@ define @test_vlseg6_mask_nxv1f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg6e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3219,6 +6345,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1f16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3234,8 +6366,22 @@ define @test_vlseg7_nxv1f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3246,6 +6392,14 @@ define @test_vlseg7_mask_nxv1f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg7e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3255,6 +6409,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3270,8 +6430,22 @@ define @test_vlseg8_nxv1f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -3282,6 +6456,14 @@ define @test_vlseg8_mask_nxv1f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg8e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3292,6 +6474,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -3307,8 +6495,22 @@ define @test_vlseg2_nxv1f32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1f32( undef, undef, ptr %base, i32 %vl) @@ -3319,10 +6521,24 @@ define @test_vlseg2_mask_nxv1f32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg2e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1f32( undef, undef, ptr %base, i32 %vl) @@ -3338,8 +6554,22 @@ define @test_vlseg3_nxv1f32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1f32( undef, undef, undef, ptr %base, i32 %vl) @@ -3350,11 +6580,25 @@ define @test_vlseg3_mask_nxv1f32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg3e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1f32( undef, undef, undef, ptr %base, i32 %vl) @@ -3370,8 +6614,22 @@ define @test_vlseg4_nxv1f32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1f32( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3382,12 +6640,26 @@ define @test_vlseg4_mask_nxv1f32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg4e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1f32( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3403,8 +6675,22 @@ define @test_vlseg5_nxv1f32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1f32( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3415,6 +6701,14 @@ define @test_vlseg5_mask_nxv1f32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg5e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3422,6 +6716,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1f32( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3437,8 +6737,22 @@ define @test_vlseg6_nxv1f32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1f32( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3449,6 +6763,14 @@ define @test_vlseg6_mask_nxv1f32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg6e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3457,6 +6779,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1f32( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3472,8 +6800,22 @@ define @test_vlseg7_nxv1f32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1f32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3484,6 +6826,14 @@ define @test_vlseg7_mask_nxv1f32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg7e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3493,6 +6843,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1f32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3508,8 +6864,22 @@ define @test_vlseg8_nxv1f32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1f32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -3520,6 +6890,14 @@ define @test_vlseg8_mask_nxv1f32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg8e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3530,6 +6908,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1f32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -3545,8 +6929,30 @@ define @test_vlseg2_nxv8f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg2e16.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv8f16( undef, undef, ptr %base, i32 %vl) @@ -3557,10 +6963,32 @@ define @test_vlseg2_mask_nxv8f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vlseg2e16.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv8f16( undef, undef, ptr %base, i32 %vl) @@ -3576,8 +7004,30 @@ define @test_vlseg3_nxv8f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg3e16.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv8f16( undef, undef, undef, ptr %base, i32 %vl) @@ -3588,11 +7038,33 @@ define @test_vlseg3_mask_nxv8f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vlseg3e16.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv8f16( undef, undef, undef, ptr %base, i32 %vl) @@ -3608,8 +7080,30 @@ define @test_vlseg4_nxv8f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg4e16.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv8f16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3620,12 +7114,34 @@ define @test_vlseg4_mask_nxv8f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vlseg4e16.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv8f16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3641,8 +7157,52 @@ define @test_vlseg2_nxv8f32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: vlseg2e32.v v4, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv8f32( undef, undef, ptr %base, i32 %vl) @@ -3653,10 +7213,54 @@ define @test_vlseg2_mask_nxv8f32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vlseg2e32.v v4, (a0) ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlseg2e32.v v4, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv8f32( undef, undef, ptr %base, i32 %vl) @@ -3672,8 +7276,30 @@ define @test_vlseg2_nxv2f64(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vlseg2e64.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2f64( undef, undef, ptr %base, i32 %vl) @@ -3684,10 +7310,32 @@ define @test_vlseg2_mask_nxv2f64(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vlseg2e64.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlseg2e64.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2f64( undef, undef, ptr %base, i32 %vl) @@ -3703,8 +7351,30 @@ define @test_vlseg3_nxv2f64(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vlseg3e64.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2f64( undef, undef, undef, ptr %base, i32 %vl) @@ -3715,11 +7385,33 @@ define @test_vlseg3_mask_nxv2f64(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vlseg3e64.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlseg3e64.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2f64( undef, undef, undef, ptr %base, i32 %vl) @@ -3735,8 +7427,30 @@ define @test_vlseg4_nxv2f64(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vlseg4e64.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2f64( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3747,12 +7461,34 @@ define @test_vlseg4_mask_nxv2f64(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vlseg4e64.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlseg4e64.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2f64( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3768,8 +7504,22 @@ define @test_vlseg2_nxv4f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4f16( undef, undef, ptr %base, i32 %vl) @@ -3780,10 +7530,24 @@ define @test_vlseg2_mask_nxv4f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg2e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4f16( undef, undef, ptr %base, i32 %vl) @@ -3799,8 +7563,22 @@ define @test_vlseg3_nxv4f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4f16( undef, undef, undef, ptr %base, i32 %vl) @@ -3811,11 +7589,25 @@ define @test_vlseg3_mask_nxv4f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg3e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4f16( undef, undef, undef, ptr %base, i32 %vl) @@ -3831,8 +7623,22 @@ define @test_vlseg4_nxv4f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv4f16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3843,12 +7649,26 @@ define @test_vlseg4_mask_nxv4f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg4e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv4f16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3864,8 +7684,22 @@ define @test_vlseg5_nxv4f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv4f16( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3876,6 +7710,14 @@ define @test_vlseg5_mask_nxv4f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg5e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3883,6 +7725,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv4f16( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3898,8 +7746,22 @@ define @test_vlseg6_nxv4f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv4f16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3910,6 +7772,14 @@ define @test_vlseg6_mask_nxv4f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg6e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3918,6 +7788,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv4f16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3933,8 +7809,22 @@ define @test_vlseg7_nxv4f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv4f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3945,6 +7835,14 @@ define @test_vlseg7_mask_nxv4f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg7e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3954,6 +7852,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv4f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3969,8 +7873,22 @@ define @test_vlseg8_nxv4f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv4f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -3981,6 +7899,14 @@ define @test_vlseg8_mask_nxv4f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg8e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3991,6 +7917,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv4f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -4006,8 +7938,22 @@ define @test_vlseg2_nxv2f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2f16( undef, undef, ptr %base, i32 %vl) @@ -4018,10 +7964,24 @@ define @test_vlseg2_mask_nxv2f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg2e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2f16( undef, undef, ptr %base, i32 %vl) @@ -4037,8 +7997,22 @@ define @test_vlseg3_nxv2f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2f16( undef, undef, undef, ptr %base, i32 %vl) @@ -4049,11 +8023,25 @@ define @test_vlseg3_mask_nxv2f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg3e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2f16( undef, undef, undef, ptr %base, i32 %vl) @@ -4069,8 +8057,22 @@ define @test_vlseg4_nxv2f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2f16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -4081,12 +8083,26 @@ define @test_vlseg4_mask_nxv2f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg4e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2f16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -4102,8 +8118,22 @@ define @test_vlseg5_nxv2f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv2f16( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -4114,6 +8144,14 @@ define @test_vlseg5_mask_nxv2f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg5e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4121,6 +8159,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv2f16( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -4136,8 +8180,22 @@ define @test_vlseg6_nxv2f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv2f16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -4148,6 +8206,14 @@ define @test_vlseg6_mask_nxv2f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg6e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4156,6 +8222,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv2f16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -4171,8 +8243,22 @@ define @test_vlseg7_nxv2f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv2f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -4183,6 +8269,14 @@ define @test_vlseg7_mask_nxv2f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg7e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4192,6 +8286,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv2f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -4207,8 +8307,22 @@ define @test_vlseg8_nxv2f16(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv2f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -4219,6 +8333,14 @@ define @test_vlseg8_mask_nxv2f16(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg8e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4229,6 +8351,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv2f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -4244,8 +8372,30 @@ define @test_vlseg2_nxv4f32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg2e32.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4f32( undef, undef, ptr %base, i32 %vl) @@ -4256,10 +8406,32 @@ define @test_vlseg2_mask_nxv4f32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vlseg2e32.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlseg2e32.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4f32( undef, undef, ptr %base, i32 %vl) @@ -4275,8 +8447,30 @@ define @test_vlseg3_nxv4f32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg3e32.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4f32( undef, undef, undef, ptr %base, i32 %vl) @@ -4287,11 +8481,33 @@ define @test_vlseg3_mask_nxv4f32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vlseg3e32.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlseg3e32.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4f32( undef, undef, undef, ptr %base, i32 %vl) @@ -4307,8 +8523,30 @@ define @test_vlseg4_nxv4f32(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg4e32.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv4f32( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -4319,12 +8557,34 @@ define @test_vlseg4_mask_nxv4f32(ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vlseg4e32.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlseg4e32.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv4f32( undef, undef, undef, undef, ptr %base, i32 %vl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll @@ -8,8 +8,52 @@ define @test_vlseg2_nxv16i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vlseg2e16.v v4, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv16i16( undef, undef, ptr %base, i64 %vl) @@ -20,10 +64,54 @@ define @test_vlseg2_mask_nxv16i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vlseg2e16.v v4, (a0) ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv16i16( undef, undef, ptr %base, i64 %vl) @@ -39,8 +127,30 @@ define @test_vlseg2_nxv4i32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg2e32.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4i32( undef, undef, ptr %base, i64 %vl) @@ -51,10 +161,32 @@ define @test_vlseg2_mask_nxv4i32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vlseg2e32.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlseg2e32.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4i32( undef, undef, ptr %base, i64 %vl) @@ -70,8 +202,30 @@ define @test_vlseg3_nxv4i32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg3e32.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4i32( undef, undef, undef, ptr %base, i64 %vl) @@ -82,11 +236,33 @@ define @test_vlseg3_mask_nxv4i32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vlseg3e32.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlseg3e32.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4i32( undef, undef, undef, ptr %base, i64 %vl) @@ -102,8 +278,30 @@ define @test_vlseg4_nxv4i32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg4e32.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv4i32( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -114,12 +312,34 @@ define @test_vlseg4_mask_nxv4i32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vlseg4e32.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlseg4e32.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv4i32( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -135,8 +355,30 @@ define @test_vlseg2_nxv16i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vlseg2e8.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv16i8( undef, undef, ptr %base, i64 %vl) @@ -147,10 +389,32 @@ define @test_vlseg2_mask_nxv16i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vlseg2e8.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlseg2e8.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv16i8( undef, undef, ptr %base, i64 %vl) @@ -166,8 +430,30 @@ define @test_vlseg3_nxv16i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vlseg3e8.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv16i8( undef, undef, undef, ptr %base, i64 %vl) @@ -178,11 +464,33 @@ define @test_vlseg3_mask_nxv16i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vlseg3e8.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlseg3e8.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv16i8( undef, undef, undef, ptr %base, i64 %vl) @@ -198,8 +506,30 @@ define @test_vlseg4_nxv16i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vlseg4e8.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv16i8( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -210,12 +540,34 @@ define @test_vlseg4_mask_nxv16i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vlseg4e8.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlseg4e8.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv16i8( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -231,8 +583,22 @@ define @test_vlseg2_nxv1i64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg2e64.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1i64( undef, undef, ptr %base, i64 %vl) @@ -243,10 +609,24 @@ define @test_vlseg2_mask_nxv1i64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg2e64.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e64.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1i64( undef, undef, ptr %base, i64 %vl) @@ -262,8 +642,22 @@ define @test_vlseg3_nxv1i64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg3e64.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1i64( undef, undef, undef, ptr %base, i64 %vl) @@ -274,11 +668,25 @@ define @test_vlseg3_mask_nxv1i64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg3e64.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e64.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1i64( undef, undef, undef, ptr %base, i64 %vl) @@ -294,8 +702,22 @@ define @test_vlseg4_nxv1i64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg4e64.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1i64( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -306,12 +728,26 @@ define @test_vlseg4_mask_nxv1i64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg4e64.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e64.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1i64( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -327,8 +763,22 @@ define @test_vlseg5_nxv1i64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg5e64.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1i64( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -339,6 +789,14 @@ define @test_vlseg5_mask_nxv1i64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg5e64.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -346,6 +804,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e64.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1i64( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -361,8 +825,22 @@ define @test_vlseg6_nxv1i64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg6e64.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1i64( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -373,6 +851,14 @@ define @test_vlseg6_mask_nxv1i64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg6e64.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -381,6 +867,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e64.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1i64( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -396,8 +888,22 @@ define @test_vlseg7_nxv1i64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg7e64.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1i64( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -408,6 +914,14 @@ define @test_vlseg7_mask_nxv1i64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg7e64.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -417,6 +931,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e64.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1i64( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -432,8 +952,22 @@ define @test_vlseg8_nxv1i64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg8e64.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1i64( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -444,6 +978,14 @@ define @test_vlseg8_mask_nxv1i64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg8e64.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -454,6 +996,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e64.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1i64( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -469,8 +1017,22 @@ define @test_vlseg2_nxv1i32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1i32( undef, undef, ptr %base, i64 %vl) @@ -481,10 +1043,24 @@ define @test_vlseg2_mask_nxv1i32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg2e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1i32( undef, undef, ptr %base, i64 %vl) @@ -500,8 +1076,22 @@ define @test_vlseg3_nxv1i32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1i32( undef, undef, undef, ptr %base, i64 %vl) @@ -512,11 +1102,25 @@ define @test_vlseg3_mask_nxv1i32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg3e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1i32( undef, undef, undef, ptr %base, i64 %vl) @@ -532,8 +1136,22 @@ define @test_vlseg4_nxv1i32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1i32( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -544,12 +1162,26 @@ define @test_vlseg4_mask_nxv1i32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg4e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1i32( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -565,8 +1197,22 @@ define @test_vlseg5_nxv1i32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1i32( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -577,6 +1223,14 @@ define @test_vlseg5_mask_nxv1i32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg5e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -584,6 +1238,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1i32( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -599,8 +1259,22 @@ define @test_vlseg6_nxv1i32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1i32( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -611,6 +1285,14 @@ define @test_vlseg6_mask_nxv1i32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg6e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -619,6 +1301,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1i32( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -634,8 +1322,22 @@ define @test_vlseg7_nxv1i32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1i32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -646,6 +1348,14 @@ define @test_vlseg7_mask_nxv1i32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg7e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -655,6 +1365,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1i32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -670,8 +1386,22 @@ define @test_vlseg8_nxv1i32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1i32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -682,6 +1412,14 @@ define @test_vlseg8_mask_nxv1i32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg8e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -692,6 +1430,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1i32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -707,8 +1451,30 @@ define @test_vlseg2_nxv8i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg2e16.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv8i16( undef, undef, ptr %base, i64 %vl) @@ -719,10 +1485,32 @@ define @test_vlseg2_mask_nxv8i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vlseg2e16.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv8i16( undef, undef, ptr %base, i64 %vl) @@ -738,8 +1526,30 @@ define @test_vlseg3_nxv8i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg3e16.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv8i16( undef, undef, undef, ptr %base, i64 %vl) @@ -750,11 +1560,33 @@ define @test_vlseg3_mask_nxv8i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vlseg3e16.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv8i16( undef, undef, undef, ptr %base, i64 %vl) @@ -770,8 +1602,30 @@ define @test_vlseg4_nxv8i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg4e16.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv8i16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -782,12 +1636,34 @@ define @test_vlseg4_mask_nxv8i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vlseg4e16.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv8i16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -803,8 +1679,22 @@ define @test_vlseg2_nxv4i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4i8( undef, undef, ptr %base, i64 %vl) @@ -815,10 +1705,24 @@ define @test_vlseg2_mask_nxv4i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vlseg2e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4i8( undef, undef, ptr %base, i64 %vl) @@ -834,8 +1738,22 @@ define @test_vlseg3_nxv4i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4i8( undef, undef, undef, ptr %base, i64 %vl) @@ -846,11 +1764,25 @@ define @test_vlseg3_mask_nxv4i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vlseg3e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4i8( undef, undef, undef, ptr %base, i64 %vl) @@ -866,8 +1798,22 @@ define @test_vlseg4_nxv4i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv4i8( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -878,12 +1824,26 @@ define @test_vlseg4_mask_nxv4i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vlseg4e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv4i8( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -899,8 +1859,22 @@ define @test_vlseg5_nxv4i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv4i8( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -911,6 +1885,14 @@ define @test_vlseg5_mask_nxv4i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vlseg5e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -918,6 +1900,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv4i8( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -933,8 +1921,22 @@ define @test_vlseg6_nxv4i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv4i8( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -945,6 +1947,14 @@ define @test_vlseg6_mask_nxv4i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vlseg6e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -953,6 +1963,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv4i8( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -968,8 +1984,22 @@ define @test_vlseg7_nxv4i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv4i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -980,6 +2010,14 @@ define @test_vlseg7_mask_nxv4i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vlseg7e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -989,6 +2027,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv4i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1004,8 +2048,22 @@ define @test_vlseg8_nxv4i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv4i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -1016,6 +2074,14 @@ define @test_vlseg8_mask_nxv4i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vlseg8e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1026,6 +2092,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv4i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -1041,8 +2113,22 @@ define @test_vlseg2_nxv1i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1i16( undef, undef, ptr %base, i64 %vl) @@ -1053,10 +2139,24 @@ define @test_vlseg2_mask_nxv1i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg2e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1i16( undef, undef, ptr %base, i64 %vl) @@ -1072,8 +2172,22 @@ define @test_vlseg3_nxv1i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1i16( undef, undef, undef, ptr %base, i64 %vl) @@ -1084,11 +2198,25 @@ define @test_vlseg3_mask_nxv1i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg3e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1i16( undef, undef, undef, ptr %base, i64 %vl) @@ -1104,8 +2232,22 @@ define @test_vlseg4_nxv1i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1i16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1116,12 +2258,26 @@ define @test_vlseg4_mask_nxv1i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg4e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1i16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1137,8 +2293,22 @@ define @test_vlseg5_nxv1i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1i16( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1149,6 +2319,14 @@ define @test_vlseg5_mask_nxv1i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg5e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1156,6 +2334,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1i16( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1171,8 +2355,22 @@ define @test_vlseg6_nxv1i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1i16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1183,6 +2381,14 @@ define @test_vlseg6_mask_nxv1i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg6e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1191,6 +2397,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1i16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1206,8 +2418,22 @@ define @test_vlseg7_nxv1i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1218,6 +2444,14 @@ define @test_vlseg7_mask_nxv1i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg7e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1227,6 +2461,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1242,8 +2482,22 @@ define @test_vlseg8_nxv1i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -1254,6 +2508,14 @@ define @test_vlseg8_mask_nxv1i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg8e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1264,6 +2526,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -1279,8 +2547,22 @@ define @test_vlseg2_nxv2i32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2i32( undef, undef, ptr %base, i64 %vl) @@ -1291,10 +2573,24 @@ define @test_vlseg2_mask_nxv2i32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg2e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2i32( undef, undef, ptr %base, i64 %vl) @@ -1310,8 +2606,22 @@ define @test_vlseg3_nxv2i32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2i32( undef, undef, undef, ptr %base, i64 %vl) @@ -1322,11 +2632,25 @@ define @test_vlseg3_mask_nxv2i32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg3e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2i32( undef, undef, undef, ptr %base, i64 %vl) @@ -1342,8 +2666,22 @@ define @test_vlseg4_nxv2i32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2i32( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1354,12 +2692,26 @@ define @test_vlseg4_mask_nxv2i32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg4e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2i32( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1375,8 +2727,22 @@ define @test_vlseg5_nxv2i32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv2i32( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1387,6 +2753,14 @@ define @test_vlseg5_mask_nxv2i32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg5e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1394,6 +2768,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv2i32( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1409,8 +2789,22 @@ define @test_vlseg6_nxv2i32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv2i32( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1421,6 +2815,14 @@ define @test_vlseg6_mask_nxv2i32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg6e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1429,6 +2831,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv2i32( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1444,8 +2852,22 @@ define @test_vlseg7_nxv2i32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv2i32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1456,6 +2878,14 @@ define @test_vlseg7_mask_nxv2i32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg7e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1465,6 +2895,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv2i32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1480,8 +2916,22 @@ define @test_vlseg8_nxv2i32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv2i32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -1492,6 +2942,14 @@ define @test_vlseg8_mask_nxv2i32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg8e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1502,6 +2960,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv2i32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -1517,8 +2981,22 @@ define @test_vlseg2_nxv8i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv8i8( undef, undef, ptr %base, i64 %vl) @@ -1529,10 +3007,24 @@ define @test_vlseg2_mask_nxv8i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vlseg2e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv8i8( undef, undef, ptr %base, i64 %vl) @@ -1548,8 +3040,22 @@ define @test_vlseg3_nxv8i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv8i8( undef, undef, undef, ptr %base, i64 %vl) @@ -1560,11 +3066,25 @@ define @test_vlseg3_mask_nxv8i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vlseg3e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv8i8( undef, undef, undef, ptr %base, i64 %vl) @@ -1580,8 +3100,22 @@ define @test_vlseg4_nxv8i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv8i8( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1592,12 +3126,26 @@ define @test_vlseg4_mask_nxv8i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vlseg4e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv8i8( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1613,8 +3161,22 @@ define @test_vlseg5_nxv8i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv8i8( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1625,6 +3187,14 @@ define @test_vlseg5_mask_nxv8i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vlseg5e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1632,6 +3202,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv8i8( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1647,8 +3223,22 @@ define @test_vlseg6_nxv8i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv8i8( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1659,6 +3249,14 @@ define @test_vlseg6_mask_nxv8i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vlseg6e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1667,6 +3265,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv8i8( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1682,8 +3286,22 @@ define @test_vlseg7_nxv8i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv8i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1694,6 +3312,14 @@ define @test_vlseg7_mask_nxv8i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vlseg7e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1703,6 +3329,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv8i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1718,8 +3350,22 @@ define @test_vlseg8_nxv8i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv8i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -1730,6 +3376,14 @@ define @test_vlseg8_mask_nxv8i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vlseg8e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1740,6 +3394,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv8i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -1755,8 +3415,52 @@ define @test_vlseg2_nxv4i64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; CHECK-NEXT: vlseg2e64.v v4, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4i64( undef, undef, ptr %base, i64 %vl) @@ -1767,10 +3471,54 @@ define @test_vlseg2_mask_nxv4i64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vlseg2e64.v v4, (a0) ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlseg2e64.v v4, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4i64( undef, undef, ptr %base, i64 %vl) @@ -1786,8 +3534,22 @@ define @test_vlseg2_nxv4i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4i16( undef, undef, ptr %base, i64 %vl) @@ -1798,10 +3560,24 @@ define @test_vlseg2_mask_nxv4i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg2e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4i16( undef, undef, ptr %base, i64 %vl) @@ -1817,8 +3593,22 @@ define @test_vlseg3_nxv4i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4i16( undef, undef, undef, ptr %base, i64 %vl) @@ -1829,11 +3619,25 @@ define @test_vlseg3_mask_nxv4i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg3e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4i16( undef, undef, undef, ptr %base, i64 %vl) @@ -1849,8 +3653,22 @@ define @test_vlseg4_nxv4i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv4i16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1861,12 +3679,26 @@ define @test_vlseg4_mask_nxv4i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg4e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv4i16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1882,8 +3714,22 @@ define @test_vlseg5_nxv4i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv4i16( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1894,6 +3740,14 @@ define @test_vlseg5_mask_nxv4i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg5e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1901,6 +3755,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv4i16( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1916,8 +3776,22 @@ define @test_vlseg6_nxv4i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv4i16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1928,6 +3802,14 @@ define @test_vlseg6_mask_nxv4i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg6e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1936,6 +3818,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv4i16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1951,8 +3839,22 @@ define @test_vlseg7_nxv4i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv4i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1963,6 +3865,14 @@ define @test_vlseg7_mask_nxv4i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg7e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1972,6 +3882,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv4i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1987,8 +3903,22 @@ define @test_vlseg8_nxv4i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv4i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -1999,6 +3929,14 @@ define @test_vlseg8_mask_nxv4i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg8e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2009,6 +3947,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv4i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -2024,8 +3968,22 @@ define @test_vlseg2_nxv1i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1i8( undef, undef, ptr %base, i64 %vl) @@ -2036,10 +3994,24 @@ define @test_vlseg2_mask_nxv1i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vlseg2e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1i8( undef, undef, ptr %base, i64 %vl) @@ -2055,8 +4027,22 @@ define @test_vlseg3_nxv1i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1i8( undef, undef, undef, ptr %base, i64 %vl) @@ -2067,11 +4053,25 @@ define @test_vlseg3_mask_nxv1i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vlseg3e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1i8( undef, undef, undef, ptr %base, i64 %vl) @@ -2087,8 +4087,22 @@ define @test_vlseg4_nxv1i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1i8( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2099,12 +4113,26 @@ define @test_vlseg4_mask_nxv1i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vlseg4e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1i8( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2120,8 +4148,22 @@ define @test_vlseg5_nxv1i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1i8( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2132,6 +4174,14 @@ define @test_vlseg5_mask_nxv1i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vlseg5e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2139,6 +4189,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1i8( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2154,8 +4210,22 @@ define @test_vlseg6_nxv1i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1i8( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2166,6 +4236,14 @@ define @test_vlseg6_mask_nxv1i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vlseg6e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2174,6 +4252,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1i8( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2189,8 +4273,22 @@ define @test_vlseg7_nxv1i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2201,6 +4299,14 @@ define @test_vlseg7_mask_nxv1i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vlseg7e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2210,6 +4316,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2225,8 +4337,22 @@ define @test_vlseg8_nxv1i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -2237,6 +4363,14 @@ define @test_vlseg8_mask_nxv1i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vlseg8e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2247,6 +4381,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -2262,8 +4402,22 @@ define @test_vlseg2_nxv2i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2i8( undef, undef, ptr %base, i64 %vl) @@ -2274,10 +4428,24 @@ define @test_vlseg2_mask_nxv2i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vlseg2e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2i8( undef, undef, ptr %base, i64 %vl) @@ -2293,8 +4461,22 @@ define @test_vlseg3_nxv2i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2i8( undef, undef, undef, ptr %base, i64 %vl) @@ -2305,11 +4487,25 @@ define @test_vlseg3_mask_nxv2i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vlseg3e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2i8( undef, undef, undef, ptr %base, i64 %vl) @@ -2325,8 +4521,22 @@ define @test_vlseg4_nxv2i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2i8( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2337,12 +4547,26 @@ define @test_vlseg4_mask_nxv2i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vlseg4e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2i8( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2358,8 +4582,22 @@ define @test_vlseg5_nxv2i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv2i8( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2370,6 +4608,14 @@ define @test_vlseg5_mask_nxv2i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vlseg5e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2377,6 +4623,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv2i8( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2392,8 +4644,22 @@ define @test_vlseg6_nxv2i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv2i8( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2404,6 +4670,14 @@ define @test_vlseg6_mask_nxv2i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vlseg6e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2412,6 +4686,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv2i8( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2427,8 +4707,22 @@ define @test_vlseg7_nxv2i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv2i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2439,6 +4733,14 @@ define @test_vlseg7_mask_nxv2i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vlseg7e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2448,6 +4750,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv2i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2463,8 +4771,22 @@ define @test_vlseg8_nxv2i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv2i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -2475,6 +4797,14 @@ define @test_vlseg8_mask_nxv2i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vlseg8e8.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2485,6 +4815,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv2i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -2500,8 +4836,52 @@ define @test_vlseg2_nxv8i32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: vlseg2e32.v v4, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv8i32( undef, undef, ptr %base, i64 %vl) @@ -2512,10 +4892,54 @@ define @test_vlseg2_mask_nxv8i32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vlseg2e32.v v4, (a0) ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlseg2e32.v v4, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv8i32( undef, undef, ptr %base, i64 %vl) @@ -2531,8 +4955,52 @@ define @test_vlseg2_nxv32i8(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vlseg2e8.v v4, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv32i8( undef, undef, ptr %base, i64 %vl) @@ -2543,10 +5011,54 @@ define @test_vlseg2_mask_nxv32i8(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vlseg2e8.v v4, (a0) ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlseg2e8.v v4, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv32i8( undef, undef, ptr %base, i64 %vl) @@ -2562,8 +5074,22 @@ define @test_vlseg2_nxv2i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2i16( undef, undef, ptr %base, i64 %vl) @@ -2574,10 +5100,24 @@ define @test_vlseg2_mask_nxv2i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg2e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2i16( undef, undef, ptr %base, i64 %vl) @@ -2593,8 +5133,22 @@ define @test_vlseg3_nxv2i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2i16( undef, undef, undef, ptr %base, i64 %vl) @@ -2605,11 +5159,25 @@ define @test_vlseg3_mask_nxv2i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg3e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2i16( undef, undef, undef, ptr %base, i64 %vl) @@ -2625,8 +5193,22 @@ define @test_vlseg4_nxv2i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2i16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2637,12 +5219,26 @@ define @test_vlseg4_mask_nxv2i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg4e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2i16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2658,8 +5254,22 @@ define @test_vlseg5_nxv2i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv2i16( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2670,6 +5280,14 @@ define @test_vlseg5_mask_nxv2i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg5e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2677,6 +5295,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv2i16( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2692,8 +5316,22 @@ define @test_vlseg6_nxv2i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv2i16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2704,6 +5342,14 @@ define @test_vlseg6_mask_nxv2i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg6e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2712,6 +5358,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv2i16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2727,8 +5379,22 @@ define @test_vlseg7_nxv2i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv2i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2739,6 +5405,14 @@ define @test_vlseg7_mask_nxv2i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg7e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2748,6 +5422,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv2i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2763,8 +5443,22 @@ define @test_vlseg8_nxv2i16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv2i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -2775,6 +5469,14 @@ define @test_vlseg8_mask_nxv2i16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg8e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2785,6 +5487,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv2i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -2800,8 +5508,30 @@ define @test_vlseg2_nxv2i64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vlseg2e64.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2i64( undef, undef, ptr %base, i64 %vl) @@ -2812,10 +5542,32 @@ define @test_vlseg2_mask_nxv2i64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vlseg2e64.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlseg2e64.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2i64( undef, undef, ptr %base, i64 %vl) @@ -2831,8 +5583,30 @@ define @test_vlseg3_nxv2i64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vlseg3e64.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2i64( undef, undef, undef, ptr %base, i64 %vl) @@ -2843,11 +5617,33 @@ define @test_vlseg3_mask_nxv2i64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vlseg3e64.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlseg3e64.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2i64( undef, undef, undef, ptr %base, i64 %vl) @@ -2863,8 +5659,30 @@ define @test_vlseg4_nxv2i64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vlseg4e64.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2i64( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2875,12 +5693,34 @@ define @test_vlseg4_mask_nxv2i64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vlseg4e64.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlseg4e64.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2i64( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2896,8 +5736,52 @@ define @test_vlseg2_nxv16f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vlseg2e16.v v4, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv16f16( undef, undef, ptr %base, i64 %vl) @@ -2908,10 +5792,54 @@ define @test_vlseg2_mask_nxv16f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vlseg2e16.v v4, (a0) ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv16f16( undef, undef, ptr %base, i64 %vl) @@ -2927,8 +5855,52 @@ define @test_vlseg2_nxv4f64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; CHECK-NEXT: vlseg2e64.v v4, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4f64( undef, undef, ptr %base, i64 %vl) @@ -2939,10 +5911,54 @@ define @test_vlseg2_mask_nxv4f64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vlseg2e64.v v4, (a0) ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlseg2e64.v v4, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4f64( undef, undef, ptr %base, i64 %vl) @@ -2958,8 +5974,22 @@ define @test_vlseg2_nxv1f64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg2e64.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1f64( undef, undef, ptr %base, i64 %vl) @@ -2970,10 +6000,24 @@ define @test_vlseg2_mask_nxv1f64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg2e64.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e64.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1f64( undef, undef, ptr %base, i64 %vl) @@ -2989,8 +6033,22 @@ define @test_vlseg3_nxv1f64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg3e64.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1f64( undef, undef, undef, ptr %base, i64 %vl) @@ -3001,11 +6059,25 @@ define @test_vlseg3_mask_nxv1f64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg3e64.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e64.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1f64( undef, undef, undef, ptr %base, i64 %vl) @@ -3021,8 +6093,22 @@ define @test_vlseg4_nxv1f64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg4e64.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1f64( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3033,12 +6119,26 @@ define @test_vlseg4_mask_nxv1f64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg4e64.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e64.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1f64( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3054,8 +6154,22 @@ define @test_vlseg5_nxv1f64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg5e64.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1f64( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3066,6 +6180,14 @@ define @test_vlseg5_mask_nxv1f64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg5e64.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3073,6 +6195,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e64.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1f64( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3088,8 +6216,22 @@ define @test_vlseg6_nxv1f64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg6e64.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1f64( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3100,6 +6242,14 @@ define @test_vlseg6_mask_nxv1f64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg6e64.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3108,6 +6258,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e64.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1f64( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3123,8 +6279,22 @@ define @test_vlseg7_nxv1f64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg7e64.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1f64( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3135,6 +6305,14 @@ define @test_vlseg7_mask_nxv1f64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg7e64.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3144,6 +6322,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e64.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1f64( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3159,8 +6343,22 @@ define @test_vlseg8_nxv1f64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg8e64.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1f64( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -3171,6 +6369,14 @@ define @test_vlseg8_mask_nxv1f64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg8e64.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3181,6 +6387,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e64.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1f64( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -3196,8 +6408,22 @@ define @test_vlseg2_nxv2f32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2f32( undef, undef, ptr %base, i64 %vl) @@ -3208,10 +6434,24 @@ define @test_vlseg2_mask_nxv2f32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg2e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2f32( undef, undef, ptr %base, i64 %vl) @@ -3227,8 +6467,22 @@ define @test_vlseg3_nxv2f32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2f32( undef, undef, undef, ptr %base, i64 %vl) @@ -3239,11 +6493,25 @@ define @test_vlseg3_mask_nxv2f32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg3e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2f32( undef, undef, undef, ptr %base, i64 %vl) @@ -3259,8 +6527,22 @@ define @test_vlseg4_nxv2f32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2f32( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3271,12 +6553,26 @@ define @test_vlseg4_mask_nxv2f32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg4e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2f32( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3292,8 +6588,22 @@ define @test_vlseg5_nxv2f32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv2f32( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3304,6 +6614,14 @@ define @test_vlseg5_mask_nxv2f32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg5e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3311,6 +6629,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv2f32( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3326,8 +6650,22 @@ define @test_vlseg6_nxv2f32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv2f32( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3338,6 +6676,14 @@ define @test_vlseg6_mask_nxv2f32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg6e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3346,6 +6692,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv2f32( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3361,8 +6713,22 @@ define @test_vlseg7_nxv2f32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv2f32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3373,6 +6739,14 @@ define @test_vlseg7_mask_nxv2f32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg7e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3382,6 +6756,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv2f32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3397,8 +6777,22 @@ define @test_vlseg8_nxv2f32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv2f32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -3409,6 +6803,14 @@ define @test_vlseg8_mask_nxv2f32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg8e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3419,6 +6821,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv2f32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -3434,8 +6842,22 @@ define @test_vlseg2_nxv1f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1f16( undef, undef, ptr %base, i64 %vl) @@ -3446,10 +6868,24 @@ define @test_vlseg2_mask_nxv1f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg2e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1f16( undef, undef, ptr %base, i64 %vl) @@ -3465,8 +6901,22 @@ define @test_vlseg3_nxv1f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1f16( undef, undef, undef, ptr %base, i64 %vl) @@ -3477,11 +6927,25 @@ define @test_vlseg3_mask_nxv1f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg3e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1f16( undef, undef, undef, ptr %base, i64 %vl) @@ -3497,8 +6961,22 @@ define @test_vlseg4_nxv1f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1f16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3509,12 +6987,26 @@ define @test_vlseg4_mask_nxv1f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg4e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1f16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3530,8 +7022,22 @@ define @test_vlseg5_nxv1f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1f16( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3542,6 +7048,14 @@ define @test_vlseg5_mask_nxv1f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg5e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3549,6 +7063,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1f16( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3564,8 +7084,22 @@ define @test_vlseg6_nxv1f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1f16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3576,6 +7110,14 @@ define @test_vlseg6_mask_nxv1f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg6e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3584,6 +7126,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1f16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3599,8 +7147,22 @@ define @test_vlseg7_nxv1f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3611,6 +7173,14 @@ define @test_vlseg7_mask_nxv1f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg7e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3620,6 +7190,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3635,8 +7211,22 @@ define @test_vlseg8_nxv1f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -3647,6 +7237,14 @@ define @test_vlseg8_mask_nxv1f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg8e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3657,6 +7255,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -3672,8 +7276,22 @@ define @test_vlseg2_nxv1f32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1f32( undef, undef, ptr %base, i64 %vl) @@ -3684,10 +7302,24 @@ define @test_vlseg2_mask_nxv1f32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg2e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv1f32( undef, undef, ptr %base, i64 %vl) @@ -3703,8 +7335,22 @@ define @test_vlseg3_nxv1f32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1f32( undef, undef, undef, ptr %base, i64 %vl) @@ -3715,11 +7361,25 @@ define @test_vlseg3_mask_nxv1f32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg3e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv1f32( undef, undef, undef, ptr %base, i64 %vl) @@ -3735,8 +7395,22 @@ define @test_vlseg4_nxv1f32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1f32( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3747,12 +7421,26 @@ define @test_vlseg4_mask_nxv1f32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg4e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv1f32( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3768,8 +7456,22 @@ define @test_vlseg5_nxv1f32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1f32( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3780,6 +7482,14 @@ define @test_vlseg5_mask_nxv1f32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg5e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3787,6 +7497,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv1f32( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3802,8 +7518,22 @@ define @test_vlseg6_nxv1f32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1f32( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3814,6 +7544,14 @@ define @test_vlseg6_mask_nxv1f32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg6e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3822,6 +7560,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv1f32( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3837,8 +7581,22 @@ define @test_vlseg7_nxv1f32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1f32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3849,6 +7607,14 @@ define @test_vlseg7_mask_nxv1f32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg7e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3858,6 +7624,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv1f32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3873,8 +7645,22 @@ define @test_vlseg8_nxv1f32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1f32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -3885,6 +7671,14 @@ define @test_vlseg8_mask_nxv1f32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg8e32.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3895,6 +7689,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv1f32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -3910,8 +7710,30 @@ define @test_vlseg2_nxv8f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg2e16.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv8f16( undef, undef, ptr %base, i64 %vl) @@ -3922,10 +7744,32 @@ define @test_vlseg2_mask_nxv8f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vlseg2e16.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv8f16( undef, undef, ptr %base, i64 %vl) @@ -3941,8 +7785,30 @@ define @test_vlseg3_nxv8f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg3e16.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv8f16( undef, undef, undef, ptr %base, i64 %vl) @@ -3953,11 +7819,33 @@ define @test_vlseg3_mask_nxv8f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vlseg3e16.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv8f16( undef, undef, undef, ptr %base, i64 %vl) @@ -3973,8 +7861,30 @@ define @test_vlseg4_nxv8f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg4e16.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv8f16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3985,12 +7895,34 @@ define @test_vlseg4_mask_nxv8f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vlseg4e16.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv8f16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4006,8 +7938,52 @@ define @test_vlseg2_nxv8f32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: vlseg2e32.v v4, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv8f32( undef, undef, ptr %base, i64 %vl) @@ -4018,10 +7994,54 @@ define @test_vlseg2_mask_nxv8f32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vlseg2e32.v v4, (a0) ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlseg2e32.v v4, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv8f32( undef, undef, ptr %base, i64 %vl) @@ -4037,8 +8057,30 @@ define @test_vlseg2_nxv2f64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vlseg2e64.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2f64( undef, undef, ptr %base, i64 %vl) @@ -4049,10 +8091,32 @@ define @test_vlseg2_mask_nxv2f64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vlseg2e64.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlseg2e64.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2f64( undef, undef, ptr %base, i64 %vl) @@ -4068,8 +8132,30 @@ define @test_vlseg3_nxv2f64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vlseg3e64.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2f64( undef, undef, undef, ptr %base, i64 %vl) @@ -4080,11 +8166,33 @@ define @test_vlseg3_mask_nxv2f64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vlseg3e64.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlseg3e64.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2f64( undef, undef, undef, ptr %base, i64 %vl) @@ -4100,8 +8208,30 @@ define @test_vlseg4_nxv2f64(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vlseg4e64.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2f64( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4112,12 +8242,34 @@ define @test_vlseg4_mask_nxv2f64(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vlseg4e64.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlseg4e64.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2f64( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4133,8 +8285,22 @@ define @test_vlseg2_nxv4f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4f16( undef, undef, ptr %base, i64 %vl) @@ -4145,10 +8311,24 @@ define @test_vlseg2_mask_nxv4f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg2e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4f16( undef, undef, ptr %base, i64 %vl) @@ -4164,8 +8344,22 @@ define @test_vlseg3_nxv4f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4f16( undef, undef, undef, ptr %base, i64 %vl) @@ -4176,11 +8370,25 @@ define @test_vlseg3_mask_nxv4f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg3e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4f16( undef, undef, undef, ptr %base, i64 %vl) @@ -4196,8 +8404,22 @@ define @test_vlseg4_nxv4f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv4f16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4208,12 +8430,26 @@ define @test_vlseg4_mask_nxv4f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg4e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv4f16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4229,8 +8465,22 @@ define @test_vlseg5_nxv4f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv4f16( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4241,6 +8491,14 @@ define @test_vlseg5_mask_nxv4f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg5e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4248,6 +8506,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv4f16( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4263,8 +8527,22 @@ define @test_vlseg6_nxv4f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv4f16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4275,6 +8553,14 @@ define @test_vlseg6_mask_nxv4f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg6e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4283,6 +8569,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv4f16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4298,8 +8590,22 @@ define @test_vlseg7_nxv4f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv4f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4310,6 +8616,14 @@ define @test_vlseg7_mask_nxv4f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg7e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4319,6 +8633,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv4f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4334,8 +8654,22 @@ define @test_vlseg8_nxv4f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv4f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -4346,6 +8680,14 @@ define @test_vlseg8_mask_nxv4f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg8e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4356,6 +8698,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv4f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -4371,8 +8719,22 @@ define @test_vlseg2_nxv2f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2f16( undef, undef, ptr %base, i64 %vl) @@ -4383,10 +8745,24 @@ define @test_vlseg2_mask_nxv2f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg2e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv2f16( undef, undef, ptr %base, i64 %vl) @@ -4402,8 +8778,22 @@ define @test_vlseg3_nxv2f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2f16( undef, undef, undef, ptr %base, i64 %vl) @@ -4414,11 +8804,25 @@ define @test_vlseg3_mask_nxv2f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg3e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv2f16( undef, undef, undef, ptr %base, i64 %vl) @@ -4434,8 +8838,22 @@ define @test_vlseg4_nxv2f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2f16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4446,12 +8864,26 @@ define @test_vlseg4_mask_nxv2f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg4e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv2f16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4467,8 +8899,22 @@ define @test_vlseg5_nxv2f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv2f16( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4479,6 +8925,14 @@ define @test_vlseg5_mask_nxv2f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg5e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4486,6 +8940,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlseg5.nxv2f16( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4501,8 +8961,22 @@ define @test_vlseg6_nxv2f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv2f16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4513,6 +8987,14 @@ define @test_vlseg6_mask_nxv2f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg6e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4521,6 +9003,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlseg6.nxv2f16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4536,8 +9024,22 @@ define @test_vlseg7_nxv2f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv2f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4548,6 +9050,14 @@ define @test_vlseg7_mask_nxv2f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg7e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4557,6 +9067,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlseg7.nxv2f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4572,8 +9088,22 @@ define @test_vlseg8_nxv2f16(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv2f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -4584,6 +9114,14 @@ define @test_vlseg8_mask_nxv2f16(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg8e16.v v7, (a0) ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4594,6 +9132,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlseg8.nxv2f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -4609,8 +9153,30 @@ define @test_vlseg2_nxv4f32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg2e32.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4f32( undef, undef, ptr %base, i64 %vl) @@ -4621,10 +9187,32 @@ define @test_vlseg2_mask_nxv4f32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vlseg2e32.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlseg2e32.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv4f32( undef, undef, ptr %base, i64 %vl) @@ -4640,8 +9228,30 @@ define @test_vlseg3_nxv4f32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg3e32.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4f32( undef, undef, undef, ptr %base, i64 %vl) @@ -4652,11 +9262,33 @@ define @test_vlseg3_mask_nxv4f32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vlseg3e32.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlseg3e32.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlseg3.nxv4f32( undef, undef, undef, ptr %base, i64 %vl) @@ -4672,8 +9304,30 @@ define @test_vlseg4_nxv4f32(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg4e32.v v6, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv4f32( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4684,12 +9338,34 @@ define @test_vlseg4_mask_nxv4f32(ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vlseg4e32.v v6, (a0) ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlseg4e32.v v6, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlseg4.nxv4f32( undef, undef, undef, undef, ptr %base, i64 %vl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll --- a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32-dead.ll @@ -39,8 +39,52 @@ define @test_vlseg2ff_dead_vl(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2ff_dead_vl: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v4, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv16i16( undef, undef, ptr %base, i32 %vl) @@ -51,9 +95,53 @@ define @test_vlseg2ff_mask_dead_vl( %val, ptr %base, i32 %vl, %mask) { ; CHECK-LABEL: test_vlseg2ff_mask_dead_vl: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v4, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv16i16( %val, %val, ptr %base, %mask, i32 %vl, i32 1) diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv32.ll @@ -8,10 +8,54 @@ define @test_vlseg2ff_nxv16i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v4, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv16i16( undef, undef, ptr %base, i32 %vl) @@ -24,11 +68,55 @@ define @test_vlseg2ff_mask_nxv16i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v4, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv16i16( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -44,10 +132,24 @@ define @test_vlseg2ff_nxv1i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg2e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv1i8( undef, undef, ptr %base, i32 %vl) @@ -60,11 +162,25 @@ define @test_vlseg2ff_mask_nxv1i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vlseg2e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv1i8( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -80,10 +196,24 @@ define @test_vlseg3ff_nxv1i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg3e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.nxv1i8( undef, undef, undef, ptr %base, i32 %vl) @@ -96,12 +226,26 @@ define @test_vlseg3ff_mask_nxv1i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vlseg3e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.mask.nxv1i8( %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -117,10 +261,24 @@ define @test_vlseg4ff_nxv1i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg4e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.nxv1i8( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -133,6 +291,14 @@ define @test_vlseg4ff_mask_nxv1i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -140,6 +306,12 @@ ; CHECK-NEXT: vlseg4e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.mask.nxv1i8( %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -155,10 +327,24 @@ define @test_vlseg5ff_nxv1i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg5e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.nxv1i8( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -171,6 +357,14 @@ define @test_vlseg5ff_mask_nxv1i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -179,6 +373,12 @@ ; CHECK-NEXT: vlseg5e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.mask.nxv1i8( %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -194,10 +394,24 @@ define @test_vlseg6ff_nxv1i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg6e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.nxv1i8( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -210,6 +424,14 @@ define @test_vlseg6ff_mask_nxv1i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -219,6 +441,12 @@ ; CHECK-NEXT: vlseg6e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.mask.nxv1i8( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -234,10 +462,24 @@ define @test_vlseg7ff_nxv1i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg7e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.nxv1i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -250,6 +492,14 @@ define @test_vlseg7ff_mask_nxv1i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -260,6 +510,12 @@ ; CHECK-NEXT: vlseg7e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.mask.nxv1i8( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -275,10 +531,24 @@ define @test_vlseg8ff_nxv1i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg8e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.nxv1i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -291,6 +561,14 @@ define @test_vlseg8ff_mask_nxv1i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -302,6 +580,12 @@ ; CHECK-NEXT: vlseg8e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.mask.nxv1i8( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -317,10 +601,32 @@ define @test_vlseg2ff_nxv16i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vlseg2e8ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv16i8( undef, undef, ptr %base, i32 %vl) @@ -333,11 +639,33 @@ define @test_vlseg2ff_mask_nxv16i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vlseg2e8ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv16i8( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -353,10 +681,32 @@ define @test_vlseg3ff_nxv16i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vlseg3e8ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.nxv16i8( undef, undef, undef, ptr %base, i32 %vl) @@ -369,12 +719,34 @@ define @test_vlseg3ff_mask_nxv16i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vlseg3e8ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.mask.nxv16i8( %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -390,10 +762,32 @@ define @test_vlseg4ff_nxv16i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vlseg4e8ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.nxv16i8( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -406,6 +800,18 @@ define @test_vlseg4ff_mask_nxv16i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv2r.v v12, v8 @@ -413,6 +819,16 @@ ; CHECK-NEXT: vlseg4e8ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.mask.nxv16i8( %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -428,10 +844,24 @@ define @test_vlseg2ff_nxv2i32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg2e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv2i32( undef, undef, ptr %base, i32 %vl) @@ -444,11 +874,25 @@ define @test_vlseg2ff_mask_nxv2i32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg2e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv2i32( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -464,10 +908,24 @@ define @test_vlseg3ff_nxv2i32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg3e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.nxv2i32( undef, undef, undef, ptr %base, i32 %vl) @@ -480,12 +938,26 @@ define @test_vlseg3ff_mask_nxv2i32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg3e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.mask.nxv2i32( %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -501,10 +973,24 @@ define @test_vlseg4ff_nxv2i32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg4e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.nxv2i32( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -517,6 +1003,14 @@ define @test_vlseg4ff_mask_nxv2i32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -524,6 +1018,12 @@ ; CHECK-NEXT: vlseg4e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.mask.nxv2i32( %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -539,10 +1039,24 @@ define @test_vlseg5ff_nxv2i32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg5e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.nxv2i32( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -555,6 +1069,14 @@ define @test_vlseg5ff_mask_nxv2i32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -563,6 +1085,12 @@ ; CHECK-NEXT: vlseg5e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.mask.nxv2i32( %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -578,10 +1106,24 @@ define @test_vlseg6ff_nxv2i32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg6e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.nxv2i32( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -594,6 +1136,14 @@ define @test_vlseg6ff_mask_nxv2i32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -603,6 +1153,12 @@ ; CHECK-NEXT: vlseg6e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.mask.nxv2i32( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -618,10 +1174,24 @@ define @test_vlseg7ff_nxv2i32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg7e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.nxv2i32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -634,6 +1204,14 @@ define @test_vlseg7ff_mask_nxv2i32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -644,6 +1222,12 @@ ; CHECK-NEXT: vlseg7e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.mask.nxv2i32( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -659,10 +1243,24 @@ define @test_vlseg8ff_nxv2i32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg8e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.nxv2i32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -675,6 +1273,14 @@ define @test_vlseg8ff_mask_nxv2i32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -686,6 +1292,12 @@ ; CHECK-NEXT: vlseg8e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.mask.nxv2i32( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -701,10 +1313,24 @@ define @test_vlseg2ff_nxv4i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv4i16( undef, undef, ptr %base, i32 %vl) @@ -717,11 +1343,25 @@ define @test_vlseg2ff_mask_nxv4i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv4i16( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -737,10 +1377,24 @@ define @test_vlseg3ff_nxv4i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg3e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.nxv4i16( undef, undef, undef, ptr %base, i32 %vl) @@ -753,12 +1407,26 @@ define @test_vlseg3ff_mask_nxv4i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg3e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.mask.nxv4i16( %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -774,10 +1442,24 @@ define @test_vlseg4ff_nxv4i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg4e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.nxv4i16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -790,6 +1472,14 @@ define @test_vlseg4ff_mask_nxv4i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -797,6 +1487,12 @@ ; CHECK-NEXT: vlseg4e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.mask.nxv4i16( %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -812,10 +1508,24 @@ define @test_vlseg5ff_nxv4i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg5e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.nxv4i16( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -828,6 +1538,14 @@ define @test_vlseg5ff_mask_nxv4i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -836,6 +1554,12 @@ ; CHECK-NEXT: vlseg5e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.mask.nxv4i16( %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -851,10 +1575,24 @@ define @test_vlseg6ff_nxv4i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg6e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.nxv4i16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -867,6 +1605,14 @@ define @test_vlseg6ff_mask_nxv4i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -876,6 +1622,12 @@ ; CHECK-NEXT: vlseg6e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.mask.nxv4i16( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -891,10 +1643,24 @@ define @test_vlseg7ff_nxv4i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg7e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.nxv4i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -907,6 +1673,14 @@ define @test_vlseg7ff_mask_nxv4i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -917,6 +1691,12 @@ ; CHECK-NEXT: vlseg7e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.mask.nxv4i16( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -932,10 +1712,24 @@ define @test_vlseg8ff_nxv4i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg8e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.nxv4i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -948,6 +1742,14 @@ define @test_vlseg8ff_mask_nxv4i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -959,6 +1761,12 @@ ; CHECK-NEXT: vlseg8e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.mask.nxv4i16( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -974,10 +1782,24 @@ define @test_vlseg2ff_nxv1i32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg2e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv1i32( undef, undef, ptr %base, i32 %vl) @@ -990,11 +1812,25 @@ define @test_vlseg2ff_mask_nxv1i32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg2e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv1i32( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1010,10 +1846,24 @@ define @test_vlseg3ff_nxv1i32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg3e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.nxv1i32( undef, undef, undef, ptr %base, i32 %vl) @@ -1026,12 +1876,26 @@ define @test_vlseg3ff_mask_nxv1i32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg3e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.mask.nxv1i32( %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1047,10 +1911,24 @@ define @test_vlseg4ff_nxv1i32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg4e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.nxv1i32( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1063,6 +1941,14 @@ define @test_vlseg4ff_mask_nxv1i32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1070,6 +1956,12 @@ ; CHECK-NEXT: vlseg4e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.mask.nxv1i32( %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1085,10 +1977,24 @@ define @test_vlseg5ff_nxv1i32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg5e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.nxv1i32( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1101,6 +2007,14 @@ define @test_vlseg5ff_mask_nxv1i32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1109,6 +2023,12 @@ ; CHECK-NEXT: vlseg5e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.mask.nxv1i32( %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1124,10 +2044,24 @@ define @test_vlseg6ff_nxv1i32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg6e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.nxv1i32( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1140,6 +2074,14 @@ define @test_vlseg6ff_mask_nxv1i32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1149,6 +2091,12 @@ ; CHECK-NEXT: vlseg6e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.mask.nxv1i32( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1164,10 +2112,24 @@ define @test_vlseg7ff_nxv1i32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg7e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.nxv1i32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1180,6 +2142,14 @@ define @test_vlseg7ff_mask_nxv1i32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1190,6 +2160,12 @@ ; CHECK-NEXT: vlseg7e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.mask.nxv1i32( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1205,10 +2181,24 @@ define @test_vlseg8ff_nxv1i32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg8e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.nxv1i32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -1221,6 +2211,14 @@ define @test_vlseg8ff_mask_nxv1i32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1232,6 +2230,12 @@ ; CHECK-NEXT: vlseg8e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.mask.nxv1i32( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1247,10 +2251,32 @@ define @test_vlseg2ff_nxv8i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv8i16( undef, undef, ptr %base, i32 %vl) @@ -1263,11 +2289,33 @@ define @test_vlseg2ff_mask_nxv8i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv8i16( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1283,10 +2331,32 @@ define @test_vlseg3ff_nxv8i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg3e16ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.nxv8i16( undef, undef, undef, ptr %base, i32 %vl) @@ -1299,12 +2369,34 @@ define @test_vlseg3ff_mask_nxv8i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vlseg3e16ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.mask.nxv8i16( %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1320,10 +2412,32 @@ define @test_vlseg4ff_nxv8i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg4e16ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.nxv8i16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1336,6 +2450,18 @@ define @test_vlseg4ff_mask_nxv8i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv2r.v v12, v8 @@ -1343,6 +2469,16 @@ ; CHECK-NEXT: vlseg4e16ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.mask.nxv8i16( %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1358,10 +2494,24 @@ define @test_vlseg2ff_nxv8i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg2e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv8i8( undef, undef, ptr %base, i32 %vl) @@ -1374,11 +2524,25 @@ define @test_vlseg2ff_mask_nxv8i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vlseg2e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv8i8( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1394,10 +2558,24 @@ define @test_vlseg3ff_nxv8i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg3e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.nxv8i8( undef, undef, undef, ptr %base, i32 %vl) @@ -1410,12 +2588,26 @@ define @test_vlseg3ff_mask_nxv8i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vlseg3e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.mask.nxv8i8( %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1431,10 +2623,24 @@ define @test_vlseg4ff_nxv8i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg4e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.nxv8i8( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1447,6 +2653,14 @@ define @test_vlseg4ff_mask_nxv8i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1454,6 +2668,12 @@ ; CHECK-NEXT: vlseg4e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.mask.nxv8i8( %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1469,10 +2689,24 @@ define @test_vlseg5ff_nxv8i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg5e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.nxv8i8( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1485,6 +2719,14 @@ define @test_vlseg5ff_mask_nxv8i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1493,6 +2735,12 @@ ; CHECK-NEXT: vlseg5e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.mask.nxv8i8( %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1508,10 +2756,24 @@ define @test_vlseg6ff_nxv8i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg6e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.nxv8i8( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1524,6 +2786,14 @@ define @test_vlseg6ff_mask_nxv8i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1533,6 +2803,12 @@ ; CHECK-NEXT: vlseg6e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.mask.nxv8i8( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1548,10 +2824,24 @@ define @test_vlseg7ff_nxv8i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg7e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.nxv8i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1564,6 +2854,14 @@ define @test_vlseg7ff_mask_nxv8i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1574,6 +2872,12 @@ ; CHECK-NEXT: vlseg7e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.mask.nxv8i8( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1589,10 +2893,24 @@ define @test_vlseg8ff_nxv8i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg8e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.nxv8i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -1605,6 +2923,14 @@ define @test_vlseg8ff_mask_nxv8i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1616,6 +2942,12 @@ ; CHECK-NEXT: vlseg8e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.mask.nxv8i8( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1631,10 +2963,54 @@ define @test_vlseg2ff_nxv8i32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: vlseg2e32ff.v v4, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv8i32( undef, undef, ptr %base, i32 %vl) @@ -1647,11 +3023,55 @@ define @test_vlseg2ff_mask_nxv8i32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vlseg2e32ff.v v4, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv8i32( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1667,10 +3087,24 @@ define @test_vlseg2ff_nxv4i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg2e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv4i8( undef, undef, ptr %base, i32 %vl) @@ -1683,11 +3117,25 @@ define @test_vlseg2ff_mask_nxv4i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vlseg2e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv4i8( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1703,10 +3151,24 @@ define @test_vlseg3ff_nxv4i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg3e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.nxv4i8( undef, undef, undef, ptr %base, i32 %vl) @@ -1719,12 +3181,26 @@ define @test_vlseg3ff_mask_nxv4i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vlseg3e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.mask.nxv4i8( %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1740,10 +3216,24 @@ define @test_vlseg4ff_nxv4i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg4e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.nxv4i8( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1756,6 +3246,14 @@ define @test_vlseg4ff_mask_nxv4i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1763,6 +3261,12 @@ ; CHECK-NEXT: vlseg4e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.mask.nxv4i8( %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1778,10 +3282,24 @@ define @test_vlseg5ff_nxv4i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg5e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.nxv4i8( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1794,6 +3312,14 @@ define @test_vlseg5ff_mask_nxv4i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1802,6 +3328,12 @@ ; CHECK-NEXT: vlseg5e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.mask.nxv4i8( %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1817,10 +3349,24 @@ define @test_vlseg6ff_nxv4i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg6e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.nxv4i8( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1833,6 +3379,14 @@ define @test_vlseg6ff_mask_nxv4i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1842,6 +3396,12 @@ ; CHECK-NEXT: vlseg6e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.mask.nxv4i8( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1857,10 +3417,24 @@ define @test_vlseg7ff_nxv4i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg7e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.nxv4i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -1873,6 +3447,14 @@ define @test_vlseg7ff_mask_nxv4i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1883,6 +3465,12 @@ ; CHECK-NEXT: vlseg7e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.mask.nxv4i8( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1898,10 +3486,24 @@ define @test_vlseg8ff_nxv4i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg8e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.nxv4i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -1914,6 +3516,14 @@ define @test_vlseg8ff_mask_nxv4i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1925,6 +3535,12 @@ ; CHECK-NEXT: vlseg8e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.mask.nxv4i8( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1940,10 +3556,24 @@ define @test_vlseg2ff_nxv1i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv1i16( undef, undef, ptr %base, i32 %vl) @@ -1956,11 +3586,25 @@ define @test_vlseg2ff_mask_nxv1i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv1i16( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -1976,10 +3620,24 @@ define @test_vlseg3ff_nxv1i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg3e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.nxv1i16( undef, undef, undef, ptr %base, i32 %vl) @@ -1992,12 +3650,26 @@ define @test_vlseg3ff_mask_nxv1i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg3e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.mask.nxv1i16( %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2013,10 +3685,24 @@ define @test_vlseg4ff_nxv1i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg4e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.nxv1i16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2029,6 +3715,14 @@ define @test_vlseg4ff_mask_nxv1i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2036,6 +3730,12 @@ ; CHECK-NEXT: vlseg4e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.mask.nxv1i16( %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2051,10 +3751,24 @@ define @test_vlseg5ff_nxv1i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg5e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.nxv1i16( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2067,6 +3781,14 @@ define @test_vlseg5ff_mask_nxv1i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2075,6 +3797,12 @@ ; CHECK-NEXT: vlseg5e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.mask.nxv1i16( %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2090,10 +3818,24 @@ define @test_vlseg6ff_nxv1i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg6e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.nxv1i16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2106,6 +3848,14 @@ define @test_vlseg6ff_mask_nxv1i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2115,6 +3865,12 @@ ; CHECK-NEXT: vlseg6e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.mask.nxv1i16( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2130,10 +3886,24 @@ define @test_vlseg7ff_nxv1i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg7e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.nxv1i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2146,6 +3916,14 @@ define @test_vlseg7ff_mask_nxv1i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2156,6 +3934,12 @@ ; CHECK-NEXT: vlseg7e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.mask.nxv1i16( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2171,10 +3955,24 @@ define @test_vlseg8ff_nxv1i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg8e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.nxv1i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -2187,6 +3985,14 @@ define @test_vlseg8ff_mask_nxv1i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2198,6 +4004,12 @@ ; CHECK-NEXT: vlseg8e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.mask.nxv1i16( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2213,10 +4025,54 @@ define @test_vlseg2ff_nxv32i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vlseg2e8ff.v v4, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv32i8( undef, undef, ptr %base, i32 %vl) @@ -2229,11 +4085,55 @@ define @test_vlseg2ff_mask_nxv32i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vlseg2e8ff.v v4, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv32i8( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2249,10 +4149,24 @@ define @test_vlseg2ff_nxv2i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg2e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv2i8( undef, undef, ptr %base, i32 %vl) @@ -2265,11 +4179,25 @@ define @test_vlseg2ff_mask_nxv2i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vlseg2e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv2i8( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2285,10 +4213,24 @@ define @test_vlseg3ff_nxv2i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg3e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.nxv2i8( undef, undef, undef, ptr %base, i32 %vl) @@ -2301,12 +4243,26 @@ define @test_vlseg3ff_mask_nxv2i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vlseg3e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.mask.nxv2i8( %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2322,10 +4278,24 @@ define @test_vlseg4ff_nxv2i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg4e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.nxv2i8( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2338,6 +4308,14 @@ define @test_vlseg4ff_mask_nxv2i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2345,6 +4323,12 @@ ; CHECK-NEXT: vlseg4e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.mask.nxv2i8( %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2360,10 +4344,24 @@ define @test_vlseg5ff_nxv2i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg5e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.nxv2i8( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2376,6 +4374,14 @@ define @test_vlseg5ff_mask_nxv2i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2384,6 +4390,12 @@ ; CHECK-NEXT: vlseg5e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.mask.nxv2i8( %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2399,10 +4411,24 @@ define @test_vlseg6ff_nxv2i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg6e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.nxv2i8( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2415,6 +4441,14 @@ define @test_vlseg6ff_mask_nxv2i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2424,6 +4458,12 @@ ; CHECK-NEXT: vlseg6e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.mask.nxv2i8( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2439,10 +4479,24 @@ define @test_vlseg7ff_nxv2i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg7e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.nxv2i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2455,6 +4509,14 @@ define @test_vlseg7ff_mask_nxv2i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2465,6 +4527,12 @@ ; CHECK-NEXT: vlseg7e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.mask.nxv2i8( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2480,10 +4548,24 @@ define @test_vlseg8ff_nxv2i8(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg8e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.nxv2i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -2496,6 +4578,14 @@ define @test_vlseg8ff_mask_nxv2i8( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2507,6 +4597,12 @@ ; CHECK-NEXT: vlseg8e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.mask.nxv2i8( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2522,10 +4618,24 @@ define @test_vlseg2ff_nxv2i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv2i16( undef, undef, ptr %base, i32 %vl) @@ -2538,11 +4648,25 @@ define @test_vlseg2ff_mask_nxv2i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv2i16( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2558,10 +4682,24 @@ define @test_vlseg3ff_nxv2i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg3e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.nxv2i16( undef, undef, undef, ptr %base, i32 %vl) @@ -2574,12 +4712,26 @@ define @test_vlseg3ff_mask_nxv2i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg3e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.mask.nxv2i16( %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2595,10 +4747,24 @@ define @test_vlseg4ff_nxv2i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg4e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.nxv2i16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2611,6 +4777,14 @@ define @test_vlseg4ff_mask_nxv2i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2618,6 +4792,12 @@ ; CHECK-NEXT: vlseg4e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.mask.nxv2i16( %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2633,10 +4813,24 @@ define @test_vlseg5ff_nxv2i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg5e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.nxv2i16( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2649,6 +4843,14 @@ define @test_vlseg5ff_mask_nxv2i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2657,6 +4859,12 @@ ; CHECK-NEXT: vlseg5e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.mask.nxv2i16( %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2672,10 +4880,24 @@ define @test_vlseg6ff_nxv2i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg6e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.nxv2i16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2688,6 +4910,14 @@ define @test_vlseg6ff_mask_nxv2i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2697,6 +4927,12 @@ ; CHECK-NEXT: vlseg6e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.mask.nxv2i16( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2712,10 +4948,24 @@ define @test_vlseg7ff_nxv2i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg7e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.nxv2i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2728,6 +4978,14 @@ define @test_vlseg7ff_mask_nxv2i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2738,6 +4996,12 @@ ; CHECK-NEXT: vlseg7e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.mask.nxv2i16( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2753,10 +5017,24 @@ define @test_vlseg8ff_nxv2i16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg8e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.nxv2i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -2769,6 +5047,14 @@ define @test_vlseg8ff_mask_nxv2i16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2780,6 +5066,12 @@ ; CHECK-NEXT: vlseg8e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.mask.nxv2i16( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2795,10 +5087,32 @@ define @test_vlseg2ff_nxv4i32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg2e32ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv4i32( undef, undef, ptr %base, i32 %vl) @@ -2811,11 +5125,33 @@ define @test_vlseg2ff_mask_nxv4i32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vlseg2e32ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv4i32( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2831,10 +5167,32 @@ define @test_vlseg3ff_nxv4i32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg3e32ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.nxv4i32( undef, undef, undef, ptr %base, i32 %vl) @@ -2847,12 +5205,34 @@ define @test_vlseg3ff_mask_nxv4i32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vlseg3e32ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.mask.nxv4i32( %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2868,10 +5248,32 @@ define @test_vlseg4ff_nxv4i32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg4e32ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.nxv4i32( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -2884,6 +5286,18 @@ define @test_vlseg4ff_mask_nxv4i32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv2r.v v12, v8 @@ -2891,6 +5305,16 @@ ; CHECK-NEXT: vlseg4e32ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.mask.nxv4i32( %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2906,10 +5330,54 @@ define @test_vlseg2ff_nxv16f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v4, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv16f16( undef, undef, ptr %base, i32 %vl) @@ -2922,11 +5390,55 @@ define @test_vlseg2ff_mask_nxv16f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v4, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv16f16( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2942,10 +5454,54 @@ define @test_vlseg2ff_nxv4f64(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; CHECK-NEXT: vlseg2e64ff.v v4, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv4f64( undef, undef, ptr %base, i32 %vl) @@ -2958,11 +5514,55 @@ define @test_vlseg2ff_mask_nxv4f64( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vlseg2e64ff.v v4, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv4f64( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -2978,10 +5578,24 @@ define @test_vlseg2ff_nxv1f64(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg2e64ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv1f64( undef, undef, ptr %base, i32 %vl) @@ -2994,11 +5608,25 @@ define @test_vlseg2ff_mask_nxv1f64( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg2e64ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv1f64( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3014,10 +5642,24 @@ define @test_vlseg3ff_nxv1f64(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg3e64ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.nxv1f64( undef, undef, undef, ptr %base, i32 %vl) @@ -3030,12 +5672,26 @@ define @test_vlseg3ff_mask_nxv1f64( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg3e64ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.mask.nxv1f64( %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3051,10 +5707,24 @@ define @test_vlseg4ff_nxv1f64(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg4e64ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.nxv1f64( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3067,6 +5737,14 @@ define @test_vlseg4ff_mask_nxv1f64( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3074,6 +5752,12 @@ ; CHECK-NEXT: vlseg4e64ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.mask.nxv1f64( %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3089,10 +5773,24 @@ define @test_vlseg5ff_nxv1f64(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg5e64ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.nxv1f64( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3105,6 +5803,14 @@ define @test_vlseg5ff_mask_nxv1f64( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3113,6 +5819,12 @@ ; CHECK-NEXT: vlseg5e64ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.mask.nxv1f64( %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3128,10 +5840,24 @@ define @test_vlseg6ff_nxv1f64(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg6e64ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.nxv1f64( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3144,6 +5870,14 @@ define @test_vlseg6ff_mask_nxv1f64( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3153,6 +5887,12 @@ ; CHECK-NEXT: vlseg6e64ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.mask.nxv1f64( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3168,10 +5908,24 @@ define @test_vlseg7ff_nxv1f64(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg7e64ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.nxv1f64( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3184,6 +5938,14 @@ define @test_vlseg7ff_mask_nxv1f64( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3194,6 +5956,12 @@ ; CHECK-NEXT: vlseg7e64ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.mask.nxv1f64( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3209,10 +5977,24 @@ define @test_vlseg8ff_nxv1f64(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg8e64ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.nxv1f64( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -3225,6 +6007,14 @@ define @test_vlseg8ff_mask_nxv1f64( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3236,6 +6026,12 @@ ; CHECK-NEXT: vlseg8e64ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.mask.nxv1f64( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3251,10 +6047,24 @@ define @test_vlseg2ff_nxv2f32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg2e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv2f32( undef, undef, ptr %base, i32 %vl) @@ -3267,11 +6077,25 @@ define @test_vlseg2ff_mask_nxv2f32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg2e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv2f32( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3287,10 +6111,24 @@ define @test_vlseg3ff_nxv2f32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg3e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.nxv2f32( undef, undef, undef, ptr %base, i32 %vl) @@ -3303,12 +6141,26 @@ define @test_vlseg3ff_mask_nxv2f32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg3e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.mask.nxv2f32( %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3324,10 +6176,24 @@ define @test_vlseg4ff_nxv2f32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg4e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.nxv2f32( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3340,6 +6206,14 @@ define @test_vlseg4ff_mask_nxv2f32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3347,6 +6221,12 @@ ; CHECK-NEXT: vlseg4e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.mask.nxv2f32( %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3362,10 +6242,24 @@ define @test_vlseg5ff_nxv2f32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg5e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.nxv2f32( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3378,6 +6272,14 @@ define @test_vlseg5ff_mask_nxv2f32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3386,6 +6288,12 @@ ; CHECK-NEXT: vlseg5e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.mask.nxv2f32( %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3401,10 +6309,24 @@ define @test_vlseg6ff_nxv2f32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg6e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.nxv2f32( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3417,6 +6339,14 @@ define @test_vlseg6ff_mask_nxv2f32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3426,6 +6356,12 @@ ; CHECK-NEXT: vlseg6e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.mask.nxv2f32( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3441,10 +6377,24 @@ define @test_vlseg7ff_nxv2f32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg7e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.nxv2f32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3457,6 +6407,14 @@ define @test_vlseg7ff_mask_nxv2f32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3467,6 +6425,12 @@ ; CHECK-NEXT: vlseg7e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.mask.nxv2f32( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3482,10 +6446,24 @@ define @test_vlseg8ff_nxv2f32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg8e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.nxv2f32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -3498,6 +6476,14 @@ define @test_vlseg8ff_mask_nxv2f32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3509,6 +6495,12 @@ ; CHECK-NEXT: vlseg8e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.mask.nxv2f32( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3524,10 +6516,24 @@ define @test_vlseg2ff_nxv1f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv1f16( undef, undef, ptr %base, i32 %vl) @@ -3540,11 +6546,25 @@ define @test_vlseg2ff_mask_nxv1f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv1f16( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3560,10 +6580,24 @@ define @test_vlseg3ff_nxv1f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg3e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.nxv1f16( undef, undef, undef, ptr %base, i32 %vl) @@ -3576,12 +6610,26 @@ define @test_vlseg3ff_mask_nxv1f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg3e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.mask.nxv1f16( %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3597,10 +6645,24 @@ define @test_vlseg4ff_nxv1f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg4e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.nxv1f16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3613,6 +6675,14 @@ define @test_vlseg4ff_mask_nxv1f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3620,6 +6690,12 @@ ; CHECK-NEXT: vlseg4e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.mask.nxv1f16( %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3635,10 +6711,24 @@ define @test_vlseg5ff_nxv1f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg5e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.nxv1f16( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3651,6 +6741,14 @@ define @test_vlseg5ff_mask_nxv1f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3659,6 +6757,12 @@ ; CHECK-NEXT: vlseg5e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.mask.nxv1f16( %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3674,10 +6778,24 @@ define @test_vlseg6ff_nxv1f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg6e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.nxv1f16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3690,6 +6808,14 @@ define @test_vlseg6ff_mask_nxv1f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3699,6 +6825,12 @@ ; CHECK-NEXT: vlseg6e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.mask.nxv1f16( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3714,10 +6846,24 @@ define @test_vlseg7ff_nxv1f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg7e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.nxv1f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3730,6 +6876,14 @@ define @test_vlseg7ff_mask_nxv1f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3740,6 +6894,12 @@ ; CHECK-NEXT: vlseg7e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.mask.nxv1f16( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3755,10 +6915,24 @@ define @test_vlseg8ff_nxv1f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg8e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.nxv1f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -3771,6 +6945,14 @@ define @test_vlseg8ff_mask_nxv1f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3782,6 +6964,12 @@ ; CHECK-NEXT: vlseg8e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.mask.nxv1f16( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3797,10 +6985,24 @@ define @test_vlseg2ff_nxv1f32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg2e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv1f32( undef, undef, ptr %base, i32 %vl) @@ -3813,11 +7015,25 @@ define @test_vlseg2ff_mask_nxv1f32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg2e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv1f32( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3833,10 +7049,24 @@ define @test_vlseg3ff_nxv1f32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg3e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.nxv1f32( undef, undef, undef, ptr %base, i32 %vl) @@ -3849,12 +7079,26 @@ define @test_vlseg3ff_mask_nxv1f32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg3e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.mask.nxv1f32( %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3870,10 +7114,24 @@ define @test_vlseg4ff_nxv1f32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg4e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.nxv1f32( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3886,6 +7144,14 @@ define @test_vlseg4ff_mask_nxv1f32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3893,6 +7159,12 @@ ; CHECK-NEXT: vlseg4e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.mask.nxv1f32( %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3908,10 +7180,24 @@ define @test_vlseg5ff_nxv1f32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg5e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.nxv1f32( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3924,6 +7210,14 @@ define @test_vlseg5ff_mask_nxv1f32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3932,6 +7226,12 @@ ; CHECK-NEXT: vlseg5e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.mask.nxv1f32( %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3947,10 +7247,24 @@ define @test_vlseg6ff_nxv1f32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg6e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.nxv1f32( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -3963,6 +7277,14 @@ define @test_vlseg6ff_mask_nxv1f32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3972,6 +7294,12 @@ ; CHECK-NEXT: vlseg6e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.mask.nxv1f32( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -3987,10 +7315,24 @@ define @test_vlseg7ff_nxv1f32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg7e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.nxv1f32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -4003,6 +7345,14 @@ define @test_vlseg7ff_mask_nxv1f32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4013,6 +7363,12 @@ ; CHECK-NEXT: vlseg7e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.mask.nxv1f32( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4028,10 +7384,24 @@ define @test_vlseg8ff_nxv1f32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg8e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.nxv1f32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -4044,6 +7414,14 @@ define @test_vlseg8ff_mask_nxv1f32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4055,6 +7433,12 @@ ; CHECK-NEXT: vlseg8e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.mask.nxv1f32( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4070,10 +7454,32 @@ define @test_vlseg2ff_nxv8f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv8f16( undef, undef, ptr %base, i32 %vl) @@ -4086,11 +7492,33 @@ define @test_vlseg2ff_mask_nxv8f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv8f16( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4106,10 +7534,32 @@ define @test_vlseg3ff_nxv8f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg3e16ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.nxv8f16( undef, undef, undef, ptr %base, i32 %vl) @@ -4122,12 +7572,34 @@ define @test_vlseg3ff_mask_nxv8f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vlseg3e16ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.mask.nxv8f16( %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4143,10 +7615,32 @@ define @test_vlseg4ff_nxv8f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg4e16ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.nxv8f16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -4159,6 +7653,18 @@ define @test_vlseg4ff_mask_nxv8f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv2r.v v12, v8 @@ -4166,6 +7672,16 @@ ; CHECK-NEXT: vlseg4e16ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.mask.nxv8f16( %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4181,10 +7697,54 @@ define @test_vlseg2ff_nxv8f32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: vlseg2e32ff.v v4, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv8f32( undef, undef, ptr %base, i32 %vl) @@ -4197,11 +7757,55 @@ define @test_vlseg2ff_mask_nxv8f32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vlseg2e32ff.v v4, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv8f32( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4217,10 +7821,32 @@ define @test_vlseg2ff_nxv2f64(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vlseg2e64ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv2f64( undef, undef, ptr %base, i32 %vl) @@ -4233,11 +7859,33 @@ define @test_vlseg2ff_mask_nxv2f64( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vlseg2e64ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv2f64( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4253,10 +7901,32 @@ define @test_vlseg3ff_nxv2f64(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vlseg3e64ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.nxv2f64( undef, undef, undef, ptr %base, i32 %vl) @@ -4269,12 +7939,34 @@ define @test_vlseg3ff_mask_nxv2f64( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vlseg3e64ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.mask.nxv2f64( %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4290,10 +7982,32 @@ define @test_vlseg4ff_nxv2f64(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vlseg4e64ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.nxv2f64( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -4306,6 +8020,18 @@ define @test_vlseg4ff_mask_nxv2f64( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv2r.v v12, v8 @@ -4313,6 +8039,16 @@ ; CHECK-NEXT: vlseg4e64ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.mask.nxv2f64( %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4328,10 +8064,24 @@ define @test_vlseg2ff_nxv4f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv4f16( undef, undef, ptr %base, i32 %vl) @@ -4344,11 +8094,25 @@ define @test_vlseg2ff_mask_nxv4f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv4f16( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4364,10 +8128,24 @@ define @test_vlseg3ff_nxv4f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg3e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.nxv4f16( undef, undef, undef, ptr %base, i32 %vl) @@ -4380,12 +8158,26 @@ define @test_vlseg3ff_mask_nxv4f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg3e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.mask.nxv4f16( %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4401,10 +8193,24 @@ define @test_vlseg4ff_nxv4f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg4e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.nxv4f16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -4417,6 +8223,14 @@ define @test_vlseg4ff_mask_nxv4f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4424,6 +8238,12 @@ ; CHECK-NEXT: vlseg4e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.mask.nxv4f16( %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4439,10 +8259,24 @@ define @test_vlseg5ff_nxv4f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg5e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.nxv4f16( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -4455,6 +8289,14 @@ define @test_vlseg5ff_mask_nxv4f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4463,6 +8305,12 @@ ; CHECK-NEXT: vlseg5e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.mask.nxv4f16( %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4478,10 +8326,24 @@ define @test_vlseg6ff_nxv4f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg6e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.nxv4f16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -4494,6 +8356,14 @@ define @test_vlseg6ff_mask_nxv4f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4503,6 +8373,12 @@ ; CHECK-NEXT: vlseg6e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.mask.nxv4f16( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4518,10 +8394,24 @@ define @test_vlseg7ff_nxv4f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg7e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.nxv4f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -4534,6 +8424,14 @@ define @test_vlseg7ff_mask_nxv4f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4544,6 +8442,12 @@ ; CHECK-NEXT: vlseg7e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.mask.nxv4f16( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4559,10 +8463,24 @@ define @test_vlseg8ff_nxv4f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg8e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.nxv4f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -4575,6 +8493,14 @@ define @test_vlseg8ff_mask_nxv4f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4586,6 +8512,12 @@ ; CHECK-NEXT: vlseg8e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.mask.nxv4f16( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4601,10 +8533,24 @@ define @test_vlseg2ff_nxv2f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv2f16( undef, undef, ptr %base, i32 %vl) @@ -4617,11 +8563,25 @@ define @test_vlseg2ff_mask_nxv2f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv2f16( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4637,10 +8597,24 @@ define @test_vlseg3ff_nxv2f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg3e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.nxv2f16( undef, undef, undef, ptr %base, i32 %vl) @@ -4653,12 +8627,26 @@ define @test_vlseg3ff_mask_nxv2f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg3e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.mask.nxv2f16( %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4674,10 +8662,24 @@ define @test_vlseg4ff_nxv2f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg4e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.nxv2f16( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -4690,6 +8692,14 @@ define @test_vlseg4ff_mask_nxv2f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4697,6 +8707,12 @@ ; CHECK-NEXT: vlseg4e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.mask.nxv2f16( %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4712,10 +8728,24 @@ define @test_vlseg5ff_nxv2f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg5e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.nxv2f16( undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -4728,6 +8758,14 @@ define @test_vlseg5ff_mask_nxv2f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4736,6 +8774,12 @@ ; CHECK-NEXT: vlseg5e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i32} @llvm.riscv.vlseg5ff.mask.nxv2f16( %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4751,10 +8795,24 @@ define @test_vlseg6ff_nxv2f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg6e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.nxv2f16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -4767,6 +8825,14 @@ define @test_vlseg6ff_mask_nxv2f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4776,6 +8842,12 @@ ; CHECK-NEXT: vlseg6e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i32} @llvm.riscv.vlseg6ff.mask.nxv2f16( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4791,10 +8863,24 @@ define @test_vlseg7ff_nxv2f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg7e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.nxv2f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %vl) @@ -4807,6 +8893,14 @@ define @test_vlseg7ff_mask_nxv2f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4817,6 +8911,12 @@ ; CHECK-NEXT: vlseg7e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i32} @llvm.riscv.vlseg7ff.mask.nxv2f16( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4832,10 +8932,24 @@ define @test_vlseg8ff_nxv2f16(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg8e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.nxv2f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %vl) @@ -4848,6 +8962,14 @@ define @test_vlseg8ff_mask_nxv2f16( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4859,6 +8981,12 @@ ; CHECK-NEXT: vlseg8e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i32} @llvm.riscv.vlseg8ff.mask.nxv2f16( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4874,10 +9002,32 @@ define @test_vlseg2ff_nxv4f32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg2e32ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.nxv4f32( undef, undef, ptr %base, i32 %vl) @@ -4890,11 +9040,33 @@ define @test_vlseg2ff_mask_nxv4f32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vlseg2e32ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i32} @llvm.riscv.vlseg2ff.mask.nxv4f32( %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4910,10 +9082,32 @@ define @test_vlseg3ff_nxv4f32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg3e32ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.nxv4f32( undef, undef, undef, ptr %base, i32 %vl) @@ -4926,12 +9120,34 @@ define @test_vlseg3ff_mask_nxv4f32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vlseg3e32ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i32} @llvm.riscv.vlseg3ff.mask.nxv4f32( %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) @@ -4947,10 +9163,32 @@ define @test_vlseg4ff_nxv4f32(ptr %base, i32 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg4e32ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.nxv4f32( undef, undef, undef, undef, ptr %base, i32 %vl) @@ -4963,6 +9201,18 @@ define @test_vlseg4ff_mask_nxv4f32( %val, ptr %base, i32 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv2r.v v12, v8 @@ -4970,6 +9220,16 @@ ; CHECK-NEXT: vlseg4e32ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i32} @llvm.riscv.vlseg4ff.mask.nxv4f32( %val, %val, %val, %val, ptr %base, %mask, i32 %vl, i32 1) diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll --- a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64-dead.ll @@ -39,8 +39,52 @@ define @test_vlseg2ff_dead_vl(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2ff_dead_vl: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v4, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv16i16( undef, undef, ptr %base, i64 %vl) @@ -51,9 +95,53 @@ define @test_vlseg2ff_mask_dead_vl( %val, ptr %base, i64 %vl, %mask) { ; CHECK-LABEL: test_vlseg2ff_mask_dead_vl: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v4, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv16i16( %val, %val, ptr %base, %mask, i64 %vl, i64 1) diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlsegff-rv64.ll @@ -8,10 +8,54 @@ define @test_vlseg2ff_nxv16i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v4, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv16i16( undef, undef, ptr %base, i64 %vl) @@ -24,11 +68,55 @@ define @test_vlseg2ff_mask_nxv16i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v4, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv16i16( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -44,10 +132,32 @@ define @test_vlseg2ff_nxv4i32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg2e32ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv4i32( undef, undef, ptr %base, i64 %vl) @@ -60,11 +170,33 @@ define @test_vlseg2ff_mask_nxv4i32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vlseg2e32ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv4i32( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -80,10 +212,32 @@ define @test_vlseg3ff_nxv4i32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg3e32ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv4i32( undef, undef, undef, ptr %base, i64 %vl) @@ -96,12 +250,34 @@ define @test_vlseg3ff_mask_nxv4i32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vlseg3e32ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv4i32( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -117,10 +293,32 @@ define @test_vlseg4ff_nxv4i32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg4e32ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv4i32( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -133,6 +331,18 @@ define @test_vlseg4ff_mask_nxv4i32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv2r.v v12, v8 @@ -140,6 +350,16 @@ ; CHECK-NEXT: vlseg4e32ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv4i32( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -155,10 +375,32 @@ define @test_vlseg2ff_nxv16i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vlseg2e8ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv16i8( undef, undef, ptr %base, i64 %vl) @@ -171,11 +413,33 @@ define @test_vlseg2ff_mask_nxv16i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vlseg2e8ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv16i8( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -191,10 +455,32 @@ define @test_vlseg3ff_nxv16i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vlseg3e8ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv16i8( undef, undef, undef, ptr %base, i64 %vl) @@ -207,12 +493,34 @@ define @test_vlseg3ff_mask_nxv16i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vlseg3e8ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv16i8( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -228,10 +536,32 @@ define @test_vlseg4ff_nxv16i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; CHECK-NEXT: vlseg4e8ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv16i8( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -244,6 +574,18 @@ define @test_vlseg4ff_mask_nxv16i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv2r.v v12, v8 @@ -251,6 +593,16 @@ ; CHECK-NEXT: vlseg4e8ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv16i8( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -266,10 +618,24 @@ define @test_vlseg2ff_nxv1i64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg2e64ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv1i64( undef, undef, ptr %base, i64 %vl) @@ -282,11 +648,25 @@ define @test_vlseg2ff_mask_nxv1i64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg2e64ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv1i64( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -302,10 +682,24 @@ define @test_vlseg3ff_nxv1i64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg3e64ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv1i64( undef, undef, undef, ptr %base, i64 %vl) @@ -318,12 +712,26 @@ define @test_vlseg3ff_mask_nxv1i64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg3e64ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv1i64( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -339,10 +747,24 @@ define @test_vlseg4ff_nxv1i64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg4e64ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv1i64( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -355,6 +777,14 @@ define @test_vlseg4ff_mask_nxv1i64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -362,6 +792,12 @@ ; CHECK-NEXT: vlseg4e64ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv1i64( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -377,10 +813,24 @@ define @test_vlseg5ff_nxv1i64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg5e64ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.nxv1i64( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -393,6 +843,14 @@ define @test_vlseg5ff_mask_nxv1i64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -401,6 +859,12 @@ ; CHECK-NEXT: vlseg5e64ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.mask.nxv1i64( %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -416,10 +880,24 @@ define @test_vlseg6ff_nxv1i64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg6e64ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.nxv1i64( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -432,6 +910,14 @@ define @test_vlseg6ff_mask_nxv1i64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -441,6 +927,12 @@ ; CHECK-NEXT: vlseg6e64ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.mask.nxv1i64( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -456,10 +948,24 @@ define @test_vlseg7ff_nxv1i64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg7e64ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.nxv1i64( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -472,6 +978,14 @@ define @test_vlseg7ff_mask_nxv1i64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -482,6 +996,12 @@ ; CHECK-NEXT: vlseg7e64ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.mask.nxv1i64( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -497,10 +1017,24 @@ define @test_vlseg8ff_nxv1i64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg8e64ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.nxv1i64( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -513,6 +1047,14 @@ define @test_vlseg8ff_mask_nxv1i64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -524,6 +1066,12 @@ ; CHECK-NEXT: vlseg8e64ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.mask.nxv1i64( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -539,10 +1087,24 @@ define @test_vlseg2ff_nxv1i32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg2e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv1i32( undef, undef, ptr %base, i64 %vl) @@ -555,11 +1117,25 @@ define @test_vlseg2ff_mask_nxv1i32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg2e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv1i32( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -575,10 +1151,24 @@ define @test_vlseg3ff_nxv1i32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg3e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv1i32( undef, undef, undef, ptr %base, i64 %vl) @@ -591,12 +1181,26 @@ define @test_vlseg3ff_mask_nxv1i32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg3e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv1i32( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -612,10 +1216,24 @@ define @test_vlseg4ff_nxv1i32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg4e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv1i32( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -628,6 +1246,14 @@ define @test_vlseg4ff_mask_nxv1i32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -635,6 +1261,12 @@ ; CHECK-NEXT: vlseg4e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv1i32( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -650,10 +1282,24 @@ define @test_vlseg5ff_nxv1i32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg5e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.nxv1i32( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -666,6 +1312,14 @@ define @test_vlseg5ff_mask_nxv1i32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -674,6 +1328,12 @@ ; CHECK-NEXT: vlseg5e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.mask.nxv1i32( %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -689,10 +1349,24 @@ define @test_vlseg6ff_nxv1i32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg6e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.nxv1i32( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -705,6 +1379,14 @@ define @test_vlseg6ff_mask_nxv1i32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -714,6 +1396,12 @@ ; CHECK-NEXT: vlseg6e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.mask.nxv1i32( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -729,10 +1417,24 @@ define @test_vlseg7ff_nxv1i32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg7e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.nxv1i32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -745,6 +1447,14 @@ define @test_vlseg7ff_mask_nxv1i32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -755,6 +1465,12 @@ ; CHECK-NEXT: vlseg7e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.mask.nxv1i32( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -770,10 +1486,24 @@ define @test_vlseg8ff_nxv1i32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg8e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.nxv1i32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -786,6 +1516,14 @@ define @test_vlseg8ff_mask_nxv1i32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -797,6 +1535,12 @@ ; CHECK-NEXT: vlseg8e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.mask.nxv1i32( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -812,10 +1556,32 @@ define @test_vlseg2ff_nxv8i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv8i16( undef, undef, ptr %base, i64 %vl) @@ -828,11 +1594,33 @@ define @test_vlseg2ff_mask_nxv8i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv8i16( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -848,10 +1636,32 @@ define @test_vlseg3ff_nxv8i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg3e16ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv8i16( undef, undef, undef, ptr %base, i64 %vl) @@ -864,12 +1674,34 @@ define @test_vlseg3ff_mask_nxv8i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vlseg3e16ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv8i16( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -885,10 +1717,32 @@ define @test_vlseg4ff_nxv8i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg4e16ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv8i16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -901,6 +1755,18 @@ define @test_vlseg4ff_mask_nxv8i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv2r.v v12, v8 @@ -908,6 +1774,16 @@ ; CHECK-NEXT: vlseg4e16ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv8i16( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -923,10 +1799,24 @@ define @test_vlseg2ff_nxv4i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg2e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv4i8( undef, undef, ptr %base, i64 %vl) @@ -939,11 +1829,25 @@ define @test_vlseg2ff_mask_nxv4i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vlseg2e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv4i8( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -959,10 +1863,24 @@ define @test_vlseg3ff_nxv4i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg3e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv4i8( undef, undef, undef, ptr %base, i64 %vl) @@ -975,12 +1893,26 @@ define @test_vlseg3ff_mask_nxv4i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vlseg3e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv4i8( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -996,10 +1928,24 @@ define @test_vlseg4ff_nxv4i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg4e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv4i8( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1012,6 +1958,14 @@ define @test_vlseg4ff_mask_nxv4i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1019,6 +1973,12 @@ ; CHECK-NEXT: vlseg4e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv4i8( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1034,10 +1994,24 @@ define @test_vlseg5ff_nxv4i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg5e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.nxv4i8( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1050,6 +2024,14 @@ define @test_vlseg5ff_mask_nxv4i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1058,6 +2040,12 @@ ; CHECK-NEXT: vlseg5e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.mask.nxv4i8( %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1073,10 +2061,24 @@ define @test_vlseg6ff_nxv4i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg6e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.nxv4i8( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1089,6 +2091,14 @@ define @test_vlseg6ff_mask_nxv4i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1098,6 +2108,12 @@ ; CHECK-NEXT: vlseg6e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.mask.nxv4i8( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1113,10 +2129,24 @@ define @test_vlseg7ff_nxv4i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg7e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.nxv4i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1129,6 +2159,14 @@ define @test_vlseg7ff_mask_nxv4i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1139,6 +2177,12 @@ ; CHECK-NEXT: vlseg7e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.mask.nxv4i8( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1154,10 +2198,24 @@ define @test_vlseg8ff_nxv4i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; CHECK-NEXT: vlseg8e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.nxv4i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -1170,6 +2228,14 @@ define @test_vlseg8ff_mask_nxv4i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1181,6 +2247,12 @@ ; CHECK-NEXT: vlseg8e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.mask.nxv4i8( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1196,10 +2268,24 @@ define @test_vlseg2ff_nxv1i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv1i16( undef, undef, ptr %base, i64 %vl) @@ -1212,11 +2298,25 @@ define @test_vlseg2ff_mask_nxv1i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv1i16( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1232,10 +2332,24 @@ define @test_vlseg3ff_nxv1i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg3e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv1i16( undef, undef, undef, ptr %base, i64 %vl) @@ -1248,12 +2362,26 @@ define @test_vlseg3ff_mask_nxv1i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg3e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv1i16( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1269,10 +2397,24 @@ define @test_vlseg4ff_nxv1i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg4e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv1i16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1285,6 +2427,14 @@ define @test_vlseg4ff_mask_nxv1i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1292,6 +2442,12 @@ ; CHECK-NEXT: vlseg4e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv1i16( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1307,10 +2463,24 @@ define @test_vlseg5ff_nxv1i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg5e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.nxv1i16( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1323,6 +2493,14 @@ define @test_vlseg5ff_mask_nxv1i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1331,6 +2509,12 @@ ; CHECK-NEXT: vlseg5e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.mask.nxv1i16( %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1346,10 +2530,24 @@ define @test_vlseg6ff_nxv1i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg6e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.nxv1i16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1362,6 +2560,14 @@ define @test_vlseg6ff_mask_nxv1i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1371,6 +2577,12 @@ ; CHECK-NEXT: vlseg6e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.mask.nxv1i16( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1386,10 +2598,24 @@ define @test_vlseg7ff_nxv1i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg7e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.nxv1i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1402,6 +2628,14 @@ define @test_vlseg7ff_mask_nxv1i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1412,6 +2646,12 @@ ; CHECK-NEXT: vlseg7e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.mask.nxv1i16( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1427,10 +2667,24 @@ define @test_vlseg8ff_nxv1i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg8e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.nxv1i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -1443,6 +2697,14 @@ define @test_vlseg8ff_mask_nxv1i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1454,6 +2716,12 @@ ; CHECK-NEXT: vlseg8e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.mask.nxv1i16( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1469,10 +2737,24 @@ define @test_vlseg2ff_nxv2i32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg2e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv2i32( undef, undef, ptr %base, i64 %vl) @@ -1485,11 +2767,25 @@ define @test_vlseg2ff_mask_nxv2i32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg2e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv2i32( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1505,10 +2801,24 @@ define @test_vlseg3ff_nxv2i32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg3e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv2i32( undef, undef, undef, ptr %base, i64 %vl) @@ -1521,12 +2831,26 @@ define @test_vlseg3ff_mask_nxv2i32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg3e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv2i32( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1542,10 +2866,24 @@ define @test_vlseg4ff_nxv2i32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg4e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv2i32( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1558,6 +2896,14 @@ define @test_vlseg4ff_mask_nxv2i32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1565,6 +2911,12 @@ ; CHECK-NEXT: vlseg4e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv2i32( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1580,10 +2932,24 @@ define @test_vlseg5ff_nxv2i32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg5e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.nxv2i32( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1596,6 +2962,14 @@ define @test_vlseg5ff_mask_nxv2i32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1604,6 +2978,12 @@ ; CHECK-NEXT: vlseg5e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.mask.nxv2i32( %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1619,10 +2999,24 @@ define @test_vlseg6ff_nxv2i32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg6e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.nxv2i32( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1635,6 +3029,14 @@ define @test_vlseg6ff_mask_nxv2i32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1644,6 +3046,12 @@ ; CHECK-NEXT: vlseg6e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.mask.nxv2i32( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1659,10 +3067,24 @@ define @test_vlseg7ff_nxv2i32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg7e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.nxv2i32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1675,6 +3097,14 @@ define @test_vlseg7ff_mask_nxv2i32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1685,6 +3115,12 @@ ; CHECK-NEXT: vlseg7e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.mask.nxv2i32( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1700,10 +3136,24 @@ define @test_vlseg8ff_nxv2i32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg8e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.nxv2i32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -1716,6 +3166,14 @@ define @test_vlseg8ff_mask_nxv2i32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1727,6 +3185,12 @@ ; CHECK-NEXT: vlseg8e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.mask.nxv2i32( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1742,10 +3206,24 @@ define @test_vlseg2ff_nxv8i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg2e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv8i8( undef, undef, ptr %base, i64 %vl) @@ -1758,11 +3236,25 @@ define @test_vlseg2ff_mask_nxv8i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vlseg2e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv8i8( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1778,10 +3270,24 @@ define @test_vlseg3ff_nxv8i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg3e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv8i8( undef, undef, undef, ptr %base, i64 %vl) @@ -1794,12 +3300,26 @@ define @test_vlseg3ff_mask_nxv8i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vlseg3e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv8i8( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1815,10 +3335,24 @@ define @test_vlseg4ff_nxv8i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg4e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv8i8( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1831,6 +3365,14 @@ define @test_vlseg4ff_mask_nxv8i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1838,6 +3380,12 @@ ; CHECK-NEXT: vlseg4e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv8i8( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1853,10 +3401,24 @@ define @test_vlseg5ff_nxv8i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg5e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.nxv8i8( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1869,6 +3431,14 @@ define @test_vlseg5ff_mask_nxv8i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1877,6 +3447,12 @@ ; CHECK-NEXT: vlseg5e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.mask.nxv8i8( %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1892,10 +3468,24 @@ define @test_vlseg6ff_nxv8i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg6e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.nxv8i8( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1908,6 +3498,14 @@ define @test_vlseg6ff_mask_nxv8i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1917,6 +3515,12 @@ ; CHECK-NEXT: vlseg6e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.mask.nxv8i8( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1932,10 +3536,24 @@ define @test_vlseg7ff_nxv8i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg7e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.nxv8i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -1948,6 +3566,14 @@ define @test_vlseg7ff_mask_nxv8i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -1958,6 +3584,12 @@ ; CHECK-NEXT: vlseg7e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.mask.nxv8i8( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -1973,10 +3605,24 @@ define @test_vlseg8ff_nxv8i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma ; CHECK-NEXT: vlseg8e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.nxv8i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -1989,6 +3635,14 @@ define @test_vlseg8ff_mask_nxv8i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2000,6 +3654,12 @@ ; CHECK-NEXT: vlseg8e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.mask.nxv8i8( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2015,10 +3675,54 @@ define @test_vlseg2ff_nxv4i64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; CHECK-NEXT: vlseg2e64ff.v v4, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv4i64( undef, undef, ptr %base, i64 %vl) @@ -2031,11 +3735,55 @@ define @test_vlseg2ff_mask_nxv4i64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vlseg2e64ff.v v4, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv4i64( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2051,10 +3799,24 @@ define @test_vlseg2ff_nxv4i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv4i16( undef, undef, ptr %base, i64 %vl) @@ -2067,11 +3829,25 @@ define @test_vlseg2ff_mask_nxv4i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv4i16( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2087,10 +3863,24 @@ define @test_vlseg3ff_nxv4i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg3e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv4i16( undef, undef, undef, ptr %base, i64 %vl) @@ -2103,12 +3893,26 @@ define @test_vlseg3ff_mask_nxv4i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg3e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv4i16( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2124,10 +3928,24 @@ define @test_vlseg4ff_nxv4i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg4e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv4i16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2140,6 +3958,14 @@ define @test_vlseg4ff_mask_nxv4i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2147,6 +3973,12 @@ ; CHECK-NEXT: vlseg4e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv4i16( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2162,10 +3994,24 @@ define @test_vlseg5ff_nxv4i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg5e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.nxv4i16( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2178,6 +4024,14 @@ define @test_vlseg5ff_mask_nxv4i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2186,6 +4040,12 @@ ; CHECK-NEXT: vlseg5e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.mask.nxv4i16( %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2201,10 +4061,24 @@ define @test_vlseg6ff_nxv4i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg6e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.nxv4i16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2217,6 +4091,14 @@ define @test_vlseg6ff_mask_nxv4i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2226,6 +4108,12 @@ ; CHECK-NEXT: vlseg6e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.mask.nxv4i16( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2241,10 +4129,24 @@ define @test_vlseg7ff_nxv4i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg7e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.nxv4i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2257,6 +4159,14 @@ define @test_vlseg7ff_mask_nxv4i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2267,6 +4177,12 @@ ; CHECK-NEXT: vlseg7e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.mask.nxv4i16( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2282,10 +4198,24 @@ define @test_vlseg8ff_nxv4i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg8e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.nxv4i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -2298,6 +4228,14 @@ define @test_vlseg8ff_mask_nxv4i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2309,6 +4247,12 @@ ; CHECK-NEXT: vlseg8e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.mask.nxv4i16( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2324,10 +4268,24 @@ define @test_vlseg2ff_nxv1i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg2e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv1i8( undef, undef, ptr %base, i64 %vl) @@ -2340,11 +4298,25 @@ define @test_vlseg2ff_mask_nxv1i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vlseg2e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv1i8( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2360,10 +4332,24 @@ define @test_vlseg3ff_nxv1i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg3e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv1i8( undef, undef, undef, ptr %base, i64 %vl) @@ -2376,12 +4362,26 @@ define @test_vlseg3ff_mask_nxv1i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vlseg3e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv1i8( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2397,10 +4397,24 @@ define @test_vlseg4ff_nxv1i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg4e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv1i8( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2413,6 +4427,14 @@ define @test_vlseg4ff_mask_nxv1i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2420,6 +4442,12 @@ ; CHECK-NEXT: vlseg4e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv1i8( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2435,10 +4463,24 @@ define @test_vlseg5ff_nxv1i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg5e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.nxv1i8( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2451,6 +4493,14 @@ define @test_vlseg5ff_mask_nxv1i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2459,6 +4509,12 @@ ; CHECK-NEXT: vlseg5e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.mask.nxv1i8( %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2474,10 +4530,24 @@ define @test_vlseg6ff_nxv1i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg6e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.nxv1i8( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2490,6 +4560,14 @@ define @test_vlseg6ff_mask_nxv1i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2499,6 +4577,12 @@ ; CHECK-NEXT: vlseg6e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.mask.nxv1i8( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2514,10 +4598,24 @@ define @test_vlseg7ff_nxv1i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg7e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.nxv1i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2530,6 +4628,14 @@ define @test_vlseg7ff_mask_nxv1i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2540,6 +4646,12 @@ ; CHECK-NEXT: vlseg7e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.mask.nxv1i8( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2555,10 +4667,24 @@ define @test_vlseg8ff_nxv1i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma ; CHECK-NEXT: vlseg8e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.nxv1i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -2571,6 +4697,14 @@ define @test_vlseg8ff_mask_nxv1i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2582,6 +4716,12 @@ ; CHECK-NEXT: vlseg8e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.mask.nxv1i8( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2597,10 +4737,24 @@ define @test_vlseg2ff_nxv2i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg2e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv2i8( undef, undef, ptr %base, i64 %vl) @@ -2613,11 +4767,25 @@ define @test_vlseg2ff_mask_nxv2i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vlseg2e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv2i8( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2633,10 +4801,24 @@ define @test_vlseg3ff_nxv2i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg3e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv2i8( undef, undef, undef, ptr %base, i64 %vl) @@ -2649,12 +4831,26 @@ define @test_vlseg3ff_mask_nxv2i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vlseg3e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv2i8( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2670,10 +4866,24 @@ define @test_vlseg4ff_nxv2i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg4e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv2i8( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2686,6 +4896,14 @@ define @test_vlseg4ff_mask_nxv2i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2693,6 +4911,12 @@ ; CHECK-NEXT: vlseg4e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv2i8( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2708,10 +4932,24 @@ define @test_vlseg5ff_nxv2i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg5e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.nxv2i8( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2724,6 +4962,14 @@ define @test_vlseg5ff_mask_nxv2i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2732,6 +4978,12 @@ ; CHECK-NEXT: vlseg5e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.mask.nxv2i8( %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2747,10 +4999,24 @@ define @test_vlseg6ff_nxv2i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg6e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.nxv2i8( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2763,6 +5029,14 @@ define @test_vlseg6ff_mask_nxv2i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2772,6 +5046,12 @@ ; CHECK-NEXT: vlseg6e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.mask.nxv2i8( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2787,10 +5067,24 @@ define @test_vlseg7ff_nxv2i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg7e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.nxv2i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -2803,6 +5097,14 @@ define @test_vlseg7ff_mask_nxv2i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2813,6 +5115,12 @@ ; CHECK-NEXT: vlseg7e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.mask.nxv2i8( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2828,10 +5136,24 @@ define @test_vlseg8ff_nxv2i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma ; CHECK-NEXT: vlseg8e8ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.nxv2i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -2844,6 +5166,14 @@ define @test_vlseg8ff_mask_nxv2i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -2855,6 +5185,12 @@ ; CHECK-NEXT: vlseg8e8ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.mask.nxv2i8( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2870,10 +5206,54 @@ define @test_vlseg2ff_nxv8i32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: vlseg2e32ff.v v4, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv8i32( undef, undef, ptr %base, i64 %vl) @@ -2886,11 +5266,55 @@ define @test_vlseg2ff_mask_nxv8i32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vlseg2e32ff.v v4, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv8i32( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2906,10 +5330,54 @@ define @test_vlseg2ff_nxv32i8(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma ; CHECK-NEXT: vlseg2e8ff.v v4, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv32i8( undef, undef, ptr %base, i64 %vl) @@ -2922,11 +5390,55 @@ define @test_vlseg2ff_mask_nxv32i8( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vlseg2e8ff.v v4, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv32i8( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2942,10 +5454,24 @@ define @test_vlseg2ff_nxv2i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv2i16( undef, undef, ptr %base, i64 %vl) @@ -2958,11 +5484,25 @@ define @test_vlseg2ff_mask_nxv2i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv2i16( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -2978,10 +5518,24 @@ define @test_vlseg3ff_nxv2i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg3e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv2i16( undef, undef, undef, ptr %base, i64 %vl) @@ -2994,12 +5548,26 @@ define @test_vlseg3ff_mask_nxv2i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg3e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv2i16( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3015,10 +5583,24 @@ define @test_vlseg4ff_nxv2i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg4e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv2i16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3031,6 +5613,14 @@ define @test_vlseg4ff_mask_nxv2i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3038,6 +5628,12 @@ ; CHECK-NEXT: vlseg4e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv2i16( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3053,10 +5649,24 @@ define @test_vlseg5ff_nxv2i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg5e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.nxv2i16( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3069,6 +5679,14 @@ define @test_vlseg5ff_mask_nxv2i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3077,6 +5695,12 @@ ; CHECK-NEXT: vlseg5e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.mask.nxv2i16( %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3092,10 +5716,24 @@ define @test_vlseg6ff_nxv2i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg6e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.nxv2i16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3108,6 +5746,14 @@ define @test_vlseg6ff_mask_nxv2i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3117,6 +5763,12 @@ ; CHECK-NEXT: vlseg6e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.mask.nxv2i16( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3132,10 +5784,24 @@ define @test_vlseg7ff_nxv2i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg7e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.nxv2i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3148,6 +5814,14 @@ define @test_vlseg7ff_mask_nxv2i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3158,6 +5832,12 @@ ; CHECK-NEXT: vlseg7e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.mask.nxv2i16( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3173,10 +5853,24 @@ define @test_vlseg8ff_nxv2i16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg8e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.nxv2i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -3189,6 +5883,14 @@ define @test_vlseg8ff_mask_nxv2i16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3200,6 +5902,12 @@ ; CHECK-NEXT: vlseg8e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.mask.nxv2i16( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3215,10 +5923,32 @@ define @test_vlseg2ff_nxv2i64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vlseg2e64ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv2i64( undef, undef, ptr %base, i64 %vl) @@ -3231,11 +5961,33 @@ define @test_vlseg2ff_mask_nxv2i64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vlseg2e64ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv2i64( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3251,10 +6003,32 @@ define @test_vlseg3ff_nxv2i64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vlseg3e64ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv2i64( undef, undef, undef, ptr %base, i64 %vl) @@ -3267,12 +6041,34 @@ define @test_vlseg3ff_mask_nxv2i64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vlseg3e64ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv2i64( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3288,10 +6084,32 @@ define @test_vlseg4ff_nxv2i64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vlseg4e64ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv2i64( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3304,6 +6122,18 @@ define @test_vlseg4ff_mask_nxv2i64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv2r.v v12, v8 @@ -3311,6 +6141,16 @@ ; CHECK-NEXT: vlseg4e64ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv2i64( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3326,10 +6166,54 @@ define @test_vlseg2ff_nxv16f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v4, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv16f16( undef, undef, ptr %base, i64 %vl) @@ -3342,11 +6226,55 @@ define @test_vlseg2ff_mask_nxv16f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v4, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv16f16( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3362,10 +6290,54 @@ define @test_vlseg2ff_nxv4f64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma ; CHECK-NEXT: vlseg2e64ff.v v4, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv4f64( undef, undef, ptr %base, i64 %vl) @@ -3378,11 +6350,55 @@ define @test_vlseg2ff_mask_nxv4f64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vlseg2e64ff.v v4, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv4f64( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3398,10 +6414,24 @@ define @test_vlseg2ff_nxv1f64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg2e64ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv1f64( undef, undef, ptr %base, i64 %vl) @@ -3414,11 +6444,25 @@ define @test_vlseg2ff_mask_nxv1f64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg2e64ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv1f64( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3434,10 +6478,24 @@ define @test_vlseg3ff_nxv1f64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg3e64ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv1f64( undef, undef, undef, ptr %base, i64 %vl) @@ -3450,12 +6508,26 @@ define @test_vlseg3ff_mask_nxv1f64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vlseg3e64ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv1f64( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3471,10 +6543,24 @@ define @test_vlseg4ff_nxv1f64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg4e64ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv1f64( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3487,6 +6573,14 @@ define @test_vlseg4ff_mask_nxv1f64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3494,6 +6588,12 @@ ; CHECK-NEXT: vlseg4e64ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv1f64( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3509,10 +6609,24 @@ define @test_vlseg5ff_nxv1f64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg5e64ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.nxv1f64( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3525,6 +6639,14 @@ define @test_vlseg5ff_mask_nxv1f64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3533,6 +6655,12 @@ ; CHECK-NEXT: vlseg5e64ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.mask.nxv1f64( %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3548,10 +6676,24 @@ define @test_vlseg6ff_nxv1f64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg6e64ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.nxv1f64( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3564,6 +6706,14 @@ define @test_vlseg6ff_mask_nxv1f64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3573,6 +6723,12 @@ ; CHECK-NEXT: vlseg6e64ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.mask.nxv1f64( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3588,10 +6744,24 @@ define @test_vlseg7ff_nxv1f64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg7e64ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.nxv1f64( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3604,6 +6774,14 @@ define @test_vlseg7ff_mask_nxv1f64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3614,6 +6792,12 @@ ; CHECK-NEXT: vlseg7e64ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.mask.nxv1f64( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3629,10 +6813,24 @@ define @test_vlseg8ff_nxv1f64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vlseg8e64ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.nxv1f64( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -3645,6 +6843,14 @@ define @test_vlseg8ff_mask_nxv1f64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3656,6 +6862,12 @@ ; CHECK-NEXT: vlseg8e64ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.mask.nxv1f64( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3671,10 +6883,24 @@ define @test_vlseg2ff_nxv2f32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg2e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv2f32( undef, undef, ptr %base, i64 %vl) @@ -3687,11 +6913,25 @@ define @test_vlseg2ff_mask_nxv2f32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg2e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv2f32( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3707,10 +6947,24 @@ define @test_vlseg3ff_nxv2f32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg3e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv2f32( undef, undef, undef, ptr %base, i64 %vl) @@ -3723,12 +6977,26 @@ define @test_vlseg3ff_mask_nxv2f32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vlseg3e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv2f32( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3744,10 +7012,24 @@ define @test_vlseg4ff_nxv2f32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg4e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv2f32( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3760,6 +7042,14 @@ define @test_vlseg4ff_mask_nxv2f32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3767,6 +7057,12 @@ ; CHECK-NEXT: vlseg4e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv2f32( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3782,10 +7078,24 @@ define @test_vlseg5ff_nxv2f32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg5e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.nxv2f32( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3798,6 +7108,14 @@ define @test_vlseg5ff_mask_nxv2f32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3806,6 +7124,12 @@ ; CHECK-NEXT: vlseg5e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.mask.nxv2f32( %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3821,10 +7145,24 @@ define @test_vlseg6ff_nxv2f32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg6e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.nxv2f32( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3837,6 +7175,14 @@ define @test_vlseg6ff_mask_nxv2f32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3846,6 +7192,12 @@ ; CHECK-NEXT: vlseg6e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.mask.nxv2f32( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3861,10 +7213,24 @@ define @test_vlseg7ff_nxv2f32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg7e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.nxv2f32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -3877,6 +7243,14 @@ define @test_vlseg7ff_mask_nxv2f32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3887,6 +7261,12 @@ ; CHECK-NEXT: vlseg7e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.mask.nxv2f32( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3902,10 +7282,24 @@ define @test_vlseg8ff_nxv2f32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma ; CHECK-NEXT: vlseg8e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.nxv2f32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -3918,6 +7312,14 @@ define @test_vlseg8ff_mask_nxv2f32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3929,6 +7331,12 @@ ; CHECK-NEXT: vlseg8e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.mask.nxv2f32( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3944,10 +7352,24 @@ define @test_vlseg2ff_nxv1f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv1f16( undef, undef, ptr %base, i64 %vl) @@ -3960,11 +7382,25 @@ define @test_vlseg2ff_mask_nxv1f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv1f16( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -3980,10 +7416,24 @@ define @test_vlseg3ff_nxv1f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg3e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv1f16( undef, undef, undef, ptr %base, i64 %vl) @@ -3996,12 +7446,26 @@ define @test_vlseg3ff_mask_nxv1f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vlseg3e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv1f16( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4017,10 +7481,24 @@ define @test_vlseg4ff_nxv1f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg4e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv1f16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4033,6 +7511,14 @@ define @test_vlseg4ff_mask_nxv1f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4040,6 +7526,12 @@ ; CHECK-NEXT: vlseg4e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv1f16( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4055,10 +7547,24 @@ define @test_vlseg5ff_nxv1f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg5e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.nxv1f16( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4071,6 +7577,14 @@ define @test_vlseg5ff_mask_nxv1f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4079,6 +7593,12 @@ ; CHECK-NEXT: vlseg5e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.mask.nxv1f16( %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4094,10 +7614,24 @@ define @test_vlseg6ff_nxv1f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg6e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.nxv1f16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4110,6 +7644,14 @@ define @test_vlseg6ff_mask_nxv1f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4119,6 +7661,12 @@ ; CHECK-NEXT: vlseg6e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.mask.nxv1f16( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4134,10 +7682,24 @@ define @test_vlseg7ff_nxv1f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg7e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.nxv1f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4150,6 +7712,14 @@ define @test_vlseg7ff_mask_nxv1f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4160,6 +7730,12 @@ ; CHECK-NEXT: vlseg7e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.mask.nxv1f16( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4175,10 +7751,24 @@ define @test_vlseg8ff_nxv1f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma ; CHECK-NEXT: vlseg8e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.nxv1f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -4191,6 +7781,14 @@ define @test_vlseg8ff_mask_nxv1f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4202,6 +7800,12 @@ ; CHECK-NEXT: vlseg8e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.mask.nxv1f16( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4217,10 +7821,24 @@ define @test_vlseg2ff_nxv1f32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg2e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv1f32( undef, undef, ptr %base, i64 %vl) @@ -4233,11 +7851,25 @@ define @test_vlseg2ff_mask_nxv1f32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg2e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv1f32( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4253,10 +7885,24 @@ define @test_vlseg3ff_nxv1f32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg3e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv1f32( undef, undef, undef, ptr %base, i64 %vl) @@ -4269,12 +7915,26 @@ define @test_vlseg3ff_mask_nxv1f32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vlseg3e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv1f32( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4290,10 +7950,24 @@ define @test_vlseg4ff_nxv1f32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg4e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv1f32( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4306,6 +7980,14 @@ define @test_vlseg4ff_mask_nxv1f32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4313,6 +7995,12 @@ ; CHECK-NEXT: vlseg4e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv1f32( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4328,10 +8016,24 @@ define @test_vlseg5ff_nxv1f32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg5e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.nxv1f32( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4344,6 +8046,14 @@ define @test_vlseg5ff_mask_nxv1f32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4352,6 +8062,12 @@ ; CHECK-NEXT: vlseg5e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.mask.nxv1f32( %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4367,10 +8083,24 @@ define @test_vlseg6ff_nxv1f32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg6e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.nxv1f32( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4383,6 +8113,14 @@ define @test_vlseg6ff_mask_nxv1f32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4392,6 +8130,12 @@ ; CHECK-NEXT: vlseg6e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.mask.nxv1f32( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4407,10 +8151,24 @@ define @test_vlseg7ff_nxv1f32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg7e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.nxv1f32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4423,6 +8181,14 @@ define @test_vlseg7ff_mask_nxv1f32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4433,6 +8199,12 @@ ; CHECK-NEXT: vlseg7e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.mask.nxv1f32( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4448,10 +8220,24 @@ define @test_vlseg8ff_nxv1f32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma ; CHECK-NEXT: vlseg8e32ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.nxv1f32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -4464,6 +8250,14 @@ define @test_vlseg8ff_mask_nxv1f32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4475,6 +8269,12 @@ ; CHECK-NEXT: vlseg8e32ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.mask.nxv1f32( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4490,10 +8290,32 @@ define @test_vlseg2ff_nxv8f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv8f16( undef, undef, ptr %base, i64 %vl) @@ -4506,11 +8328,33 @@ define @test_vlseg2ff_mask_nxv8f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv8f16( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4526,10 +8370,32 @@ define @test_vlseg3ff_nxv8f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg3e16ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv8f16( undef, undef, undef, ptr %base, i64 %vl) @@ -4542,12 +8408,34 @@ define @test_vlseg3ff_mask_nxv8f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vlseg3e16ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv8f16( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4563,10 +8451,32 @@ define @test_vlseg4ff_nxv8f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma ; CHECK-NEXT: vlseg4e16ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv8f16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4579,6 +8489,18 @@ define @test_vlseg4ff_mask_nxv8f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv2r.v v12, v8 @@ -4586,6 +8508,16 @@ ; CHECK-NEXT: vlseg4e16ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv8f16( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4601,10 +8533,54 @@ define @test_vlseg2ff_nxv8f32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma ; CHECK-NEXT: vlseg2e32ff.v v4, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv8f32( undef, undef, ptr %base, i64 %vl) @@ -4617,11 +8593,55 @@ define @test_vlseg2ff_mask_nxv8f32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vlseg2e32ff.v v4, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv8f32( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4637,10 +8657,32 @@ define @test_vlseg2ff_nxv2f64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vlseg2e64ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv2f64( undef, undef, ptr %base, i64 %vl) @@ -4653,11 +8695,33 @@ define @test_vlseg2ff_mask_nxv2f64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vlseg2e64ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv2f64( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4673,10 +8737,32 @@ define @test_vlseg3ff_nxv2f64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vlseg3e64ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv2f64( undef, undef, undef, ptr %base, i64 %vl) @@ -4689,12 +8775,34 @@ define @test_vlseg3ff_mask_nxv2f64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vlseg3e64ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv2f64( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4710,10 +8818,32 @@ define @test_vlseg4ff_nxv2f64(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma ; CHECK-NEXT: vlseg4e64ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv2f64( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4726,6 +8856,18 @@ define @test_vlseg4ff_mask_nxv2f64( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv2r.v v12, v8 @@ -4733,6 +8875,16 @@ ; CHECK-NEXT: vlseg4e64ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv2f64( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4748,10 +8900,24 @@ define @test_vlseg2ff_nxv4f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv4f16( undef, undef, ptr %base, i64 %vl) @@ -4764,11 +8930,25 @@ define @test_vlseg2ff_mask_nxv4f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv4f16( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4784,10 +8964,24 @@ define @test_vlseg3ff_nxv4f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg3e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv4f16( undef, undef, undef, ptr %base, i64 %vl) @@ -4800,12 +8994,26 @@ define @test_vlseg3ff_mask_nxv4f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vlseg3e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv4f16( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4821,10 +9029,24 @@ define @test_vlseg4ff_nxv4f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg4e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv4f16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4837,6 +9059,14 @@ define @test_vlseg4ff_mask_nxv4f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4844,6 +9074,12 @@ ; CHECK-NEXT: vlseg4e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv4f16( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4859,10 +9095,24 @@ define @test_vlseg5ff_nxv4f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg5e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.nxv4f16( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4875,6 +9125,14 @@ define @test_vlseg5ff_mask_nxv4f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4883,6 +9141,12 @@ ; CHECK-NEXT: vlseg5e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.mask.nxv4f16( %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4898,10 +9162,24 @@ define @test_vlseg6ff_nxv4f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg6e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.nxv4f16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4914,6 +9192,14 @@ define @test_vlseg6ff_mask_nxv4f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4923,6 +9209,12 @@ ; CHECK-NEXT: vlseg6e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.mask.nxv4f16( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4938,10 +9230,24 @@ define @test_vlseg7ff_nxv4f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg7e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.nxv4f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -4954,6 +9260,14 @@ define @test_vlseg7ff_mask_nxv4f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -4964,6 +9278,12 @@ ; CHECK-NEXT: vlseg7e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.mask.nxv4f16( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -4979,10 +9299,24 @@ define @test_vlseg8ff_nxv4f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma ; CHECK-NEXT: vlseg8e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.nxv4f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -4995,6 +9329,14 @@ define @test_vlseg8ff_mask_nxv4f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -5006,6 +9348,12 @@ ; CHECK-NEXT: vlseg8e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.mask.nxv4f16( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -5021,10 +9369,24 @@ define @test_vlseg2ff_nxv2f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv2f16( undef, undef, ptr %base, i64 %vl) @@ -5037,11 +9399,25 @@ define @test_vlseg2ff_mask_nxv2f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv2f16( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -5057,10 +9433,24 @@ define @test_vlseg3ff_nxv2f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg3e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv2f16( undef, undef, undef, ptr %base, i64 %vl) @@ -5073,12 +9463,26 @@ define @test_vlseg3ff_mask_nxv2f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vlseg3e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv2f16( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -5094,10 +9498,24 @@ define @test_vlseg4ff_nxv2f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg4e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv2f16( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -5110,6 +9528,14 @@ define @test_vlseg4ff_mask_nxv2f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -5117,6 +9543,12 @@ ; CHECK-NEXT: vlseg4e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv2f16( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -5132,10 +9564,24 @@ define @test_vlseg5ff_nxv2f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg5e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.nxv2f16( undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -5148,6 +9594,14 @@ define @test_vlseg5ff_mask_nxv2f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg5ff_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -5156,6 +9610,12 @@ ; CHECK-NEXT: vlseg5e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,, i64} @llvm.riscv.vlseg5ff.mask.nxv2f16( %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -5171,10 +9631,24 @@ define @test_vlseg6ff_nxv2f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg6e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.nxv2f16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -5187,6 +9661,14 @@ define @test_vlseg6ff_mask_nxv2f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg6ff_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -5196,6 +9678,12 @@ ; CHECK-NEXT: vlseg6e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,, i64} @llvm.riscv.vlseg6ff.mask.nxv2f16( %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -5211,10 +9699,24 @@ define @test_vlseg7ff_nxv2f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg7e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.nxv2f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %vl) @@ -5227,6 +9729,14 @@ define @test_vlseg7ff_mask_nxv2f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg7ff_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -5237,6 +9747,12 @@ ; CHECK-NEXT: vlseg7e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,, i64} @llvm.riscv.vlseg7ff.mask.nxv2f16( %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -5252,10 +9768,24 @@ define @test_vlseg8ff_nxv2f16(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma ; CHECK-NEXT: vlseg8e16ff.v v7, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.nxv2f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %vl) @@ -5268,6 +9798,14 @@ define @test_vlseg8ff_mask_nxv2f16( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg8ff_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -5279,6 +9817,12 @@ ; CHECK-NEXT: vlseg8e16ff.v v7, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,, i64} @llvm.riscv.vlseg8ff.mask.nxv2f16( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -5294,10 +9838,32 @@ define @test_vlseg2ff_nxv4f32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg2e32ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv4f32( undef, undef, ptr %base, i64 %vl) @@ -5310,11 +9876,33 @@ define @test_vlseg2ff_mask_nxv4f32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vlseg2e32ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv4f32( %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -5330,10 +9918,32 @@ define @test_vlseg3ff_nxv4f32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg3e32ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.nxv4f32( undef, undef, undef, ptr %base, i64 %vl) @@ -5346,12 +9956,34 @@ define @test_vlseg3ff_mask_nxv4f32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg3ff_mask_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vlseg3e32ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,, i64} @llvm.riscv.vlseg3ff.mask.nxv4f32( %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) @@ -5367,10 +9999,32 @@ define @test_vlseg4ff_nxv4f32(ptr %base, i64 %vl, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; CHECK-NEXT: vlseg4e32ff.v v6, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.nxv4f32( undef, undef, undef, undef, ptr %base, i64 %vl) @@ -5383,6 +10037,18 @@ define @test_vlseg4ff_mask_nxv4f32( %val, ptr %base, i64 %vl, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg4ff_mask_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv2r.v v12, v8 @@ -5390,6 +10056,16 @@ ; CHECK-NEXT: vlseg4e32ff.v v6, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a2) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,, i64} @llvm.riscv.vlseg4ff.mask.nxv4f32( %val, %val, %val, %val, ptr %base, %mask, i64 %vl, i64 1) diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv32.ll @@ -8,8 +8,52 @@ define @test_vlsseg2_nxv16i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; CHECK-NEXT: vlsseg2e16.v v4, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv16i16( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -20,10 +64,54 @@ define @test_vlsseg2_mask_nxv16i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, mu ; CHECK-NEXT: vlsseg2e16.v v4, (a0), a1 ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlsseg2e16.v v4, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv16i16( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -39,8 +127,22 @@ define @test_vlsseg2_nxv1i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1i8( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -51,10 +153,24 @@ define @test_vlsseg2_mask_nxv1i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, mu ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1i8( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -70,8 +186,22 @@ define @test_vlsseg3_nxv1i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1i8( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -82,11 +212,25 @@ define @test_vlsseg3_mask_nxv1i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, mu ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1i8( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -102,8 +246,22 @@ define @test_vlsseg4_nxv1i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1i8( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -114,12 +272,26 @@ define @test_vlsseg4_mask_nxv1i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, mu ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1i8( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -135,8 +307,22 @@ define @test_vlsseg5_nxv1i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1i8( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -147,6 +333,14 @@ define @test_vlsseg5_mask_nxv1i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, mu ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -154,6 +348,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1i8( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -169,8 +369,22 @@ define @test_vlsseg6_nxv1i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1i8( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -181,6 +395,14 @@ define @test_vlsseg6_mask_nxv1i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, mu ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -189,6 +411,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1i8( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -204,8 +432,22 @@ define @test_vlsseg7_nxv1i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -216,6 +458,14 @@ define @test_vlsseg7_mask_nxv1i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, mu ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -225,6 +475,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -240,8 +496,22 @@ define @test_vlsseg8_nxv1i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -252,6 +522,14 @@ define @test_vlsseg8_mask_nxv1i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, mu ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -262,6 +540,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -277,8 +561,30 @@ define @test_vlsseg2_nxv16i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; CHECK-NEXT: vlsseg2e8.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv16i8( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -289,10 +595,32 @@ define @test_vlsseg2_mask_nxv16i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; CHECK-NEXT: vlsseg2e8.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlsseg2e8.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv16i8( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -308,8 +636,30 @@ define @test_vlsseg3_nxv16i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; CHECK-NEXT: vlsseg3e8.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv16i8( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -320,11 +670,33 @@ define @test_vlsseg3_mask_nxv16i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; CHECK-NEXT: vlsseg3e8.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlsseg3e8.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv16i8( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -340,8 +712,30 @@ define @test_vlsseg4_nxv16i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; CHECK-NEXT: vlsseg4e8.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv16i8( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -352,12 +746,34 @@ define @test_vlsseg4_mask_nxv16i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; CHECK-NEXT: vlsseg4e8.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlsseg4e8.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv16i8( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -373,8 +789,22 @@ define @test_vlsseg2_nxv2i32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2i32( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -385,10 +815,24 @@ define @test_vlsseg2_mask_nxv2i32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2i32( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -404,8 +848,22 @@ define @test_vlsseg3_nxv2i32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2i32( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -416,11 +874,25 @@ define @test_vlsseg3_mask_nxv2i32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2i32( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -436,8 +908,22 @@ define @test_vlsseg4_nxv2i32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2i32( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -448,12 +934,26 @@ define @test_vlsseg4_mask_nxv2i32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2i32( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -469,8 +969,22 @@ define @test_vlsseg5_nxv2i32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv2i32( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -481,6 +995,14 @@ define @test_vlsseg5_mask_nxv2i32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -488,6 +1010,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv2i32( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -503,8 +1031,22 @@ define @test_vlsseg6_nxv2i32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv2i32( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -515,6 +1057,14 @@ define @test_vlsseg6_mask_nxv2i32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -523,6 +1073,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv2i32( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -538,8 +1094,22 @@ define @test_vlsseg7_nxv2i32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv2i32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -550,6 +1120,14 @@ define @test_vlsseg7_mask_nxv2i32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -559,6 +1137,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv2i32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -574,8 +1158,22 @@ define @test_vlsseg8_nxv2i32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv2i32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -586,6 +1184,14 @@ define @test_vlsseg8_mask_nxv2i32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -596,6 +1202,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv2i32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -611,8 +1223,22 @@ define @test_vlsseg2_nxv4i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4i16( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -623,10 +1249,24 @@ define @test_vlsseg2_mask_nxv4i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4i16( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -642,8 +1282,22 @@ define @test_vlsseg3_nxv4i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv4i16( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -654,11 +1308,25 @@ define @test_vlsseg3_mask_nxv4i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv4i16( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -674,8 +1342,22 @@ define @test_vlsseg4_nxv4i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv4i16( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -686,12 +1368,26 @@ define @test_vlsseg4_mask_nxv4i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv4i16( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -707,8 +1403,22 @@ define @test_vlsseg5_nxv4i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv4i16( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -719,6 +1429,14 @@ define @test_vlsseg5_mask_nxv4i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -726,6 +1444,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv4i16( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -741,8 +1465,22 @@ define @test_vlsseg6_nxv4i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv4i16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -753,6 +1491,14 @@ define @test_vlsseg6_mask_nxv4i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -761,6 +1507,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv4i16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -776,8 +1528,22 @@ define @test_vlsseg7_nxv4i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv4i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -788,6 +1554,14 @@ define @test_vlsseg7_mask_nxv4i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -797,6 +1571,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv4i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -812,8 +1592,22 @@ define @test_vlsseg8_nxv4i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv4i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -824,6 +1618,14 @@ define @test_vlsseg8_mask_nxv4i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -834,6 +1636,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv4i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -849,8 +1657,22 @@ define @test_vlsseg2_nxv1i32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1i32( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -861,10 +1683,24 @@ define @test_vlsseg2_mask_nxv1i32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1i32( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -880,8 +1716,22 @@ define @test_vlsseg3_nxv1i32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1i32( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -892,11 +1742,25 @@ define @test_vlsseg3_mask_nxv1i32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1i32( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -912,8 +1776,22 @@ define @test_vlsseg4_nxv1i32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1i32( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -924,12 +1802,26 @@ define @test_vlsseg4_mask_nxv1i32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1i32( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -945,8 +1837,22 @@ define @test_vlsseg5_nxv1i32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1i32( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -957,6 +1863,14 @@ define @test_vlsseg5_mask_nxv1i32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -964,6 +1878,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1i32( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -979,8 +1899,22 @@ define @test_vlsseg6_nxv1i32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1i32( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -991,6 +1925,14 @@ define @test_vlsseg6_mask_nxv1i32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -999,6 +1941,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1i32( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1014,8 +1962,22 @@ define @test_vlsseg7_nxv1i32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1i32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1026,6 +1988,14 @@ define @test_vlsseg7_mask_nxv1i32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1035,6 +2005,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1i32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1050,8 +2026,22 @@ define @test_vlsseg8_nxv1i32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1i32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1062,6 +2052,14 @@ define @test_vlsseg8_mask_nxv1i32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1072,6 +2070,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1i32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1087,8 +2091,30 @@ define @test_vlsseg2_nxv8i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma ; CHECK-NEXT: vlsseg2e16.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv8i16( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1099,10 +2125,32 @@ define @test_vlsseg2_mask_nxv8i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, mu ; CHECK-NEXT: vlsseg2e16.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlsseg2e16.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv8i16( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1118,8 +2166,30 @@ define @test_vlsseg3_nxv8i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma ; CHECK-NEXT: vlsseg3e16.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv8i16( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1130,11 +2200,33 @@ define @test_vlsseg3_mask_nxv8i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, mu ; CHECK-NEXT: vlsseg3e16.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlsseg3e16.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv8i16( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1150,8 +2242,30 @@ define @test_vlsseg4_nxv8i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma ; CHECK-NEXT: vlsseg4e16.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv8i16( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1162,12 +2276,34 @@ define @test_vlsseg4_mask_nxv8i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, mu ; CHECK-NEXT: vlsseg4e16.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlsseg4e16.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv8i16( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1183,8 +2319,22 @@ define @test_vlsseg2_nxv8i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv8i8( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1195,10 +2345,24 @@ define @test_vlsseg2_mask_nxv8i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, mu ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv8i8( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1214,8 +2378,22 @@ define @test_vlsseg3_nxv8i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv8i8( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1226,11 +2404,25 @@ define @test_vlsseg3_mask_nxv8i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, mu ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv8i8( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1246,8 +2438,22 @@ define @test_vlsseg4_nxv8i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv8i8( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1258,12 +2464,26 @@ define @test_vlsseg4_mask_nxv8i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, mu ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv8i8( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1279,8 +2499,22 @@ define @test_vlsseg5_nxv8i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv8i8( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1291,6 +2525,14 @@ define @test_vlsseg5_mask_nxv8i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, mu ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1298,6 +2540,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv8i8( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1313,8 +2561,22 @@ define @test_vlsseg6_nxv8i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv8i8( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1325,6 +2587,14 @@ define @test_vlsseg6_mask_nxv8i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, mu ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1333,6 +2603,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv8i8( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1348,8 +2624,22 @@ define @test_vlsseg7_nxv8i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv8i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1360,6 +2650,14 @@ define @test_vlsseg7_mask_nxv8i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, mu ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1369,6 +2667,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv8i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1384,8 +2688,22 @@ define @test_vlsseg8_nxv8i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv8i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1396,6 +2714,14 @@ define @test_vlsseg8_mask_nxv8i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, mu ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1406,6 +2732,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv8i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1421,8 +2753,52 @@ define @test_vlsseg2_nxv8i32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma ; CHECK-NEXT: vlsseg2e32.v v4, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv8i32( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1433,10 +2809,54 @@ define @test_vlsseg2_mask_nxv8i32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu ; CHECK-NEXT: vlsseg2e32.v v4, (a0), a1 ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlsseg2e32.v v4, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv8i32( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1452,8 +2872,22 @@ define @test_vlsseg2_nxv4i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4i8( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1464,10 +2898,24 @@ define @test_vlsseg2_mask_nxv4i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, mu ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4i8( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1483,8 +2931,22 @@ define @test_vlsseg3_nxv4i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv4i8( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1495,11 +2957,25 @@ define @test_vlsseg3_mask_nxv4i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, mu ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv4i8( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1515,8 +2991,22 @@ define @test_vlsseg4_nxv4i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv4i8( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1527,12 +3017,26 @@ define @test_vlsseg4_mask_nxv4i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, mu ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv4i8( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1548,8 +3052,22 @@ define @test_vlsseg5_nxv4i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv4i8( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1560,6 +3078,14 @@ define @test_vlsseg5_mask_nxv4i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, mu ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1567,6 +3093,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv4i8( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1582,8 +3114,22 @@ define @test_vlsseg6_nxv4i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv4i8( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1594,6 +3140,14 @@ define @test_vlsseg6_mask_nxv4i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, mu ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1602,6 +3156,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv4i8( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1617,8 +3177,22 @@ define @test_vlsseg7_nxv4i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv4i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1629,6 +3203,14 @@ define @test_vlsseg7_mask_nxv4i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, mu ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1638,6 +3220,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv4i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1653,8 +3241,22 @@ define @test_vlsseg8_nxv4i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv4i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1665,6 +3267,14 @@ define @test_vlsseg8_mask_nxv4i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, mu ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1675,6 +3285,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv4i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1690,8 +3306,22 @@ define @test_vlsseg2_nxv1i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1i16( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1702,10 +3332,24 @@ define @test_vlsseg2_mask_nxv1i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1i16( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1721,8 +3365,22 @@ define @test_vlsseg3_nxv1i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1i16( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1733,11 +3391,25 @@ define @test_vlsseg3_mask_nxv1i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1i16( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1753,8 +3425,22 @@ define @test_vlsseg4_nxv1i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1i16( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1765,12 +3451,26 @@ define @test_vlsseg4_mask_nxv1i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1i16( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1786,8 +3486,22 @@ define @test_vlsseg5_nxv1i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1i16( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1798,6 +3512,14 @@ define @test_vlsseg5_mask_nxv1i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1805,6 +3527,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1i16( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1820,8 +3548,22 @@ define @test_vlsseg6_nxv1i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1i16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1832,6 +3574,14 @@ define @test_vlsseg6_mask_nxv1i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1840,6 +3590,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1i16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1855,8 +3611,22 @@ define @test_vlsseg7_nxv1i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1867,6 +3637,14 @@ define @test_vlsseg7_mask_nxv1i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1876,6 +3654,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1891,8 +3675,22 @@ define @test_vlsseg8_nxv1i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1903,6 +3701,14 @@ define @test_vlsseg8_mask_nxv1i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1913,6 +3719,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1928,8 +3740,52 @@ define @test_vlsseg2_nxv32i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; CHECK-NEXT: vlsseg2e8.v v4, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv32i8( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1940,10 +3796,54 @@ define @test_vlsseg2_mask_nxv32i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, mu ; CHECK-NEXT: vlsseg2e8.v v4, (a0), a1 ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlsseg2e8.v v4, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv32i8( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1959,8 +3859,22 @@ define @test_vlsseg2_nxv2i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2i8( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1971,10 +3885,24 @@ define @test_vlsseg2_mask_nxv2i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, mu ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2i8( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -1990,8 +3918,22 @@ define @test_vlsseg3_nxv2i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2i8( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2002,11 +3944,25 @@ define @test_vlsseg3_mask_nxv2i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, mu ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2i8( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2022,8 +3978,22 @@ define @test_vlsseg4_nxv2i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2i8( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2034,12 +4004,26 @@ define @test_vlsseg4_mask_nxv2i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, mu ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2i8( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2055,8 +4039,22 @@ define @test_vlsseg5_nxv2i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv2i8( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2067,6 +4065,14 @@ define @test_vlsseg5_mask_nxv2i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, mu ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2074,6 +4080,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv2i8( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2089,8 +4101,22 @@ define @test_vlsseg6_nxv2i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv2i8( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2101,6 +4127,14 @@ define @test_vlsseg6_mask_nxv2i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, mu ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2109,6 +4143,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv2i8( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2124,8 +4164,22 @@ define @test_vlsseg7_nxv2i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv2i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2136,6 +4190,14 @@ define @test_vlsseg7_mask_nxv2i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, mu ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2145,6 +4207,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv2i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2160,8 +4228,22 @@ define @test_vlsseg8_nxv2i8(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv2i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2172,6 +4254,14 @@ define @test_vlsseg8_mask_nxv2i8(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, mu ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2182,6 +4272,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv2i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2197,8 +4293,22 @@ define @test_vlsseg2_nxv2i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2i16( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2209,10 +4319,24 @@ define @test_vlsseg2_mask_nxv2i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2i16( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2228,8 +4352,22 @@ define @test_vlsseg3_nxv2i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2i16( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2240,11 +4378,25 @@ define @test_vlsseg3_mask_nxv2i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2i16( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2260,8 +4412,22 @@ define @test_vlsseg4_nxv2i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2i16( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2272,12 +4438,26 @@ define @test_vlsseg4_mask_nxv2i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2i16( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2293,8 +4473,22 @@ define @test_vlsseg5_nxv2i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv2i16( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2305,6 +4499,14 @@ define @test_vlsseg5_mask_nxv2i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2312,6 +4514,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv2i16( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2327,8 +4535,22 @@ define @test_vlsseg6_nxv2i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv2i16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2339,6 +4561,14 @@ define @test_vlsseg6_mask_nxv2i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2347,6 +4577,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv2i16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2362,8 +4598,22 @@ define @test_vlsseg7_nxv2i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv2i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2374,6 +4624,14 @@ define @test_vlsseg7_mask_nxv2i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2383,6 +4641,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv2i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2398,8 +4662,22 @@ define @test_vlsseg8_nxv2i16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv2i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2410,6 +4688,14 @@ define @test_vlsseg8_mask_nxv2i16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2420,6 +4706,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv2i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2435,8 +4727,30 @@ define @test_vlsseg2_nxv4i32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vlsseg2e32.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4i32( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2447,10 +4761,32 @@ define @test_vlsseg2_mask_nxv4i32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu ; CHECK-NEXT: vlsseg2e32.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlsseg2e32.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4i32( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2466,8 +4802,30 @@ define @test_vlsseg3_nxv4i32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vlsseg3e32.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv4i32( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2478,11 +4836,33 @@ define @test_vlsseg3_mask_nxv4i32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu ; CHECK-NEXT: vlsseg3e32.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlsseg3e32.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv4i32( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2498,8 +4878,30 @@ define @test_vlsseg4_nxv4i32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vlsseg4e32.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv4i32( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2510,12 +4912,34 @@ define @test_vlsseg4_mask_nxv4i32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu ; CHECK-NEXT: vlsseg4e32.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlsseg4e32.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv4i32( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2531,8 +4955,52 @@ define @test_vlsseg2_nxv16f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; CHECK-NEXT: vlsseg2e16.v v4, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv16f16( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2543,10 +5011,54 @@ define @test_vlsseg2_mask_nxv16f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, mu ; CHECK-NEXT: vlsseg2e16.v v4, (a0), a1 ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlsseg2e16.v v4, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv16f16( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2562,8 +5074,52 @@ define @test_vlsseg2_nxv4f64(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; CHECK-NEXT: vlsseg2e64.v v4, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4f64( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2574,10 +5130,54 @@ define @test_vlsseg2_mask_nxv4f64(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; CHECK-NEXT: vlsseg2e64.v v4, (a0), a1 ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlsseg2e64.v v4, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4f64( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2593,8 +5193,22 @@ define @test_vlsseg2_nxv1f64(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlsseg2e64.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1f64( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2605,10 +5219,24 @@ define @test_vlsseg2_mask_nxv1f64(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vlsseg2e64.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e64.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1f64( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2624,8 +5252,22 @@ define @test_vlsseg3_nxv1f64(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlsseg3e64.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1f64( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2636,11 +5278,25 @@ define @test_vlsseg3_mask_nxv1f64(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vlsseg3e64.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e64.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1f64( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2656,8 +5312,22 @@ define @test_vlsseg4_nxv1f64(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlsseg4e64.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1f64( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2668,12 +5338,26 @@ define @test_vlsseg4_mask_nxv1f64(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vlsseg4e64.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e64.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1f64( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2689,8 +5373,22 @@ define @test_vlsseg5_nxv1f64(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlsseg5e64.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1f64( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2701,6 +5399,14 @@ define @test_vlsseg5_mask_nxv1f64(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vlsseg5e64.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2708,6 +5414,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e64.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1f64( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2723,8 +5435,22 @@ define @test_vlsseg6_nxv1f64(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlsseg6e64.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1f64( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2735,6 +5461,14 @@ define @test_vlsseg6_mask_nxv1f64(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vlsseg6e64.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2743,6 +5477,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e64.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1f64( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2758,8 +5498,22 @@ define @test_vlsseg7_nxv1f64(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlsseg7e64.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1f64( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2770,6 +5524,14 @@ define @test_vlsseg7_mask_nxv1f64(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vlsseg7e64.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2779,6 +5541,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e64.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1f64( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2794,8 +5562,22 @@ define @test_vlsseg8_nxv1f64(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlsseg8e64.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1f64( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2806,6 +5588,14 @@ define @test_vlsseg8_mask_nxv1f64(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vlsseg8e64.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2816,6 +5606,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e64.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1f64( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2831,8 +5627,22 @@ define @test_vlsseg2_nxv2f32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2f32( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2843,10 +5653,24 @@ define @test_vlsseg2_mask_nxv2f32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2f32( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2862,8 +5686,22 @@ define @test_vlsseg3_nxv2f32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2f32( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2874,11 +5712,25 @@ define @test_vlsseg3_mask_nxv2f32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2f32( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2894,8 +5746,22 @@ define @test_vlsseg4_nxv2f32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2f32( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2906,12 +5772,26 @@ define @test_vlsseg4_mask_nxv2f32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2f32( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2927,8 +5807,22 @@ define @test_vlsseg5_nxv2f32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv2f32( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2939,6 +5833,14 @@ define @test_vlsseg5_mask_nxv2f32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2946,6 +5848,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv2f32( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2961,8 +5869,22 @@ define @test_vlsseg6_nxv2f32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv2f32( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2973,6 +5895,14 @@ define @test_vlsseg6_mask_nxv2f32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2981,6 +5911,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv2f32( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -2996,8 +5932,22 @@ define @test_vlsseg7_nxv2f32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv2f32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3008,6 +5958,14 @@ define @test_vlsseg7_mask_nxv2f32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3017,6 +5975,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv2f32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3032,8 +5996,22 @@ define @test_vlsseg8_nxv2f32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv2f32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3044,6 +6022,14 @@ define @test_vlsseg8_mask_nxv2f32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3054,6 +6040,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv2f32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3069,8 +6061,22 @@ define @test_vlsseg2_nxv1f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1f16( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3081,10 +6087,24 @@ define @test_vlsseg2_mask_nxv1f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1f16( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3100,8 +6120,22 @@ define @test_vlsseg3_nxv1f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1f16( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3112,11 +6146,25 @@ define @test_vlsseg3_mask_nxv1f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1f16( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3132,8 +6180,22 @@ define @test_vlsseg4_nxv1f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1f16( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3144,12 +6206,26 @@ define @test_vlsseg4_mask_nxv1f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1f16( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3165,8 +6241,22 @@ define @test_vlsseg5_nxv1f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1f16( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3177,6 +6267,14 @@ define @test_vlsseg5_mask_nxv1f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3184,6 +6282,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1f16( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3199,8 +6303,22 @@ define @test_vlsseg6_nxv1f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1f16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3211,6 +6329,14 @@ define @test_vlsseg6_mask_nxv1f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3219,6 +6345,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1f16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3234,8 +6366,22 @@ define @test_vlsseg7_nxv1f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3246,6 +6392,14 @@ define @test_vlsseg7_mask_nxv1f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3255,6 +6409,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3270,8 +6430,22 @@ define @test_vlsseg8_nxv1f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3282,6 +6456,14 @@ define @test_vlsseg8_mask_nxv1f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3292,6 +6474,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3307,8 +6495,22 @@ define @test_vlsseg2_nxv1f32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1f32( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3319,10 +6521,24 @@ define @test_vlsseg2_mask_nxv1f32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1f32( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3338,8 +6554,22 @@ define @test_vlsseg3_nxv1f32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1f32( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3350,11 +6580,25 @@ define @test_vlsseg3_mask_nxv1f32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1f32( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3370,8 +6614,22 @@ define @test_vlsseg4_nxv1f32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1f32( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3382,12 +6640,26 @@ define @test_vlsseg4_mask_nxv1f32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1f32( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3403,8 +6675,22 @@ define @test_vlsseg5_nxv1f32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1f32( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3415,6 +6701,14 @@ define @test_vlsseg5_mask_nxv1f32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3422,6 +6716,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1f32( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3437,8 +6737,22 @@ define @test_vlsseg6_nxv1f32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1f32( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3449,6 +6763,14 @@ define @test_vlsseg6_mask_nxv1f32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3457,6 +6779,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1f32( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3472,8 +6800,22 @@ define @test_vlsseg7_nxv1f32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1f32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3484,6 +6826,14 @@ define @test_vlsseg7_mask_nxv1f32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3493,6 +6843,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1f32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3508,8 +6864,22 @@ define @test_vlsseg8_nxv1f32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1f32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3520,6 +6890,14 @@ define @test_vlsseg8_mask_nxv1f32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3530,6 +6908,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1f32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3545,8 +6929,30 @@ define @test_vlsseg2_nxv8f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma ; CHECK-NEXT: vlsseg2e16.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv8f16( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3557,10 +6963,32 @@ define @test_vlsseg2_mask_nxv8f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, mu ; CHECK-NEXT: vlsseg2e16.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlsseg2e16.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv8f16( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3576,8 +7004,30 @@ define @test_vlsseg3_nxv8f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma ; CHECK-NEXT: vlsseg3e16.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv8f16( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3588,11 +7038,33 @@ define @test_vlsseg3_mask_nxv8f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, mu ; CHECK-NEXT: vlsseg3e16.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlsseg3e16.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv8f16( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3608,8 +7080,30 @@ define @test_vlsseg4_nxv8f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma ; CHECK-NEXT: vlsseg4e16.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv8f16( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3620,12 +7114,34 @@ define @test_vlsseg4_mask_nxv8f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, mu ; CHECK-NEXT: vlsseg4e16.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlsseg4e16.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv8f16( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3641,8 +7157,52 @@ define @test_vlsseg2_nxv8f32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma ; CHECK-NEXT: vlsseg2e32.v v4, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv8f32( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3653,10 +7213,54 @@ define @test_vlsseg2_mask_nxv8f32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu ; CHECK-NEXT: vlsseg2e32.v v4, (a0), a1 ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlsseg2e32.v v4, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv8f32( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3672,8 +7276,30 @@ define @test_vlsseg2_nxv2f64(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; CHECK-NEXT: vlsseg2e64.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2f64( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3684,10 +7310,32 @@ define @test_vlsseg2_mask_nxv2f64(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; CHECK-NEXT: vlsseg2e64.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlsseg2e64.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2f64( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3703,8 +7351,30 @@ define @test_vlsseg3_nxv2f64(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; CHECK-NEXT: vlsseg3e64.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2f64( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3715,11 +7385,33 @@ define @test_vlsseg3_mask_nxv2f64(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; CHECK-NEXT: vlsseg3e64.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlsseg3e64.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2f64( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3735,8 +7427,30 @@ define @test_vlsseg4_nxv2f64(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; CHECK-NEXT: vlsseg4e64.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2f64( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3747,12 +7461,34 @@ define @test_vlsseg4_mask_nxv2f64(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; CHECK-NEXT: vlsseg4e64.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlsseg4e64.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2f64( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3768,8 +7504,22 @@ define @test_vlsseg2_nxv4f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4f16( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3780,10 +7530,24 @@ define @test_vlsseg2_mask_nxv4f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4f16( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3799,8 +7563,22 @@ define @test_vlsseg3_nxv4f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv4f16( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3811,11 +7589,25 @@ define @test_vlsseg3_mask_nxv4f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv4f16( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3831,8 +7623,22 @@ define @test_vlsseg4_nxv4f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv4f16( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3843,12 +7649,26 @@ define @test_vlsseg4_mask_nxv4f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv4f16( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3864,8 +7684,22 @@ define @test_vlsseg5_nxv4f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv4f16( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3876,6 +7710,14 @@ define @test_vlsseg5_mask_nxv4f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3883,6 +7725,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv4f16( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3898,8 +7746,22 @@ define @test_vlsseg6_nxv4f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv4f16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3910,6 +7772,14 @@ define @test_vlsseg6_mask_nxv4f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3918,6 +7788,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv4f16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3933,8 +7809,22 @@ define @test_vlsseg7_nxv4f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv4f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3945,6 +7835,14 @@ define @test_vlsseg7_mask_nxv4f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3954,6 +7852,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv4f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3969,8 +7873,22 @@ define @test_vlsseg8_nxv4f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv4f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -3981,6 +7899,14 @@ define @test_vlsseg8_mask_nxv4f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3991,6 +7917,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv4f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -4006,8 +7938,22 @@ define @test_vlsseg2_nxv2f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2f16( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -4018,10 +7964,24 @@ define @test_vlsseg2_mask_nxv2f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2f16( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -4037,8 +7997,22 @@ define @test_vlsseg3_nxv2f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2f16( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -4049,11 +8023,25 @@ define @test_vlsseg3_mask_nxv2f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2f16( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -4069,8 +8057,22 @@ define @test_vlsseg4_nxv2f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2f16( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -4081,12 +8083,26 @@ define @test_vlsseg4_mask_nxv2f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2f16( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -4102,8 +8118,22 @@ define @test_vlsseg5_nxv2f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv2f16( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -4114,6 +8144,14 @@ define @test_vlsseg5_mask_nxv2f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4121,6 +8159,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv2f16( undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -4136,8 +8180,22 @@ define @test_vlsseg6_nxv2f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv2f16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -4148,6 +8206,14 @@ define @test_vlsseg6_mask_nxv2f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4156,6 +8222,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv2f16( undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -4171,8 +8243,22 @@ define @test_vlsseg7_nxv2f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv2f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -4183,6 +8269,14 @@ define @test_vlsseg7_mask_nxv2f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4192,6 +8286,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv2f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -4207,8 +8307,22 @@ define @test_vlsseg8_nxv2f16(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv2f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -4219,6 +8333,14 @@ define @test_vlsseg8_mask_nxv2f16(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4229,6 +8351,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv2f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -4244,8 +8372,30 @@ define @test_vlsseg2_nxv4f32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vlsseg2e32.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4f32( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -4256,10 +8406,32 @@ define @test_vlsseg2_mask_nxv4f32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu ; CHECK-NEXT: vlsseg2e32.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlsseg2e32.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4f32( undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -4275,8 +8447,30 @@ define @test_vlsseg3_nxv4f32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vlsseg3e32.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv4f32( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -4287,11 +8481,33 @@ define @test_vlsseg3_mask_nxv4f32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu ; CHECK-NEXT: vlsseg3e32.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlsseg3e32.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv4f32( undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -4307,8 +8523,30 @@ define @test_vlsseg4_nxv4f32(ptr %base, i32 %offset, i32 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vlsseg4e32.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv4f32( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) @@ -4319,12 +8557,34 @@ define @test_vlsseg4_mask_nxv4f32(ptr %base, i32 %offset, i32 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu ; CHECK-NEXT: vlsseg4e32.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlsseg4e32.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv4f32( undef, undef, undef, undef, ptr %base, i32 %offset, i32 %vl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlsseg-rv64.ll @@ -8,8 +8,52 @@ define @test_vlsseg2_nxv16i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; CHECK-NEXT: vlsseg2e16.v v4, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv16i16( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -20,10 +64,54 @@ define @test_vlsseg2_mask_nxv16i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, mu ; CHECK-NEXT: vlsseg2e16.v v4, (a0), a1 ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlsseg2e16.v v4, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv16i16( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -39,8 +127,30 @@ define @test_vlsseg2_nxv4i32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vlsseg2e32.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4i32( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -51,10 +161,32 @@ define @test_vlsseg2_mask_nxv4i32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu ; CHECK-NEXT: vlsseg2e32.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlsseg2e32.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4i32( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -70,8 +202,30 @@ define @test_vlsseg3_nxv4i32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vlsseg3e32.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv4i32( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -82,11 +236,33 @@ define @test_vlsseg3_mask_nxv4i32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu ; CHECK-NEXT: vlsseg3e32.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlsseg3e32.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv4i32( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -102,8 +278,30 @@ define @test_vlsseg4_nxv4i32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vlsseg4e32.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv4i32( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -114,12 +312,34 @@ define @test_vlsseg4_mask_nxv4i32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu ; CHECK-NEXT: vlsseg4e32.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlsseg4e32.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv4i32( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -135,8 +355,30 @@ define @test_vlsseg2_nxv16i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; CHECK-NEXT: vlsseg2e8.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv16i8( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -147,10 +389,32 @@ define @test_vlsseg2_mask_nxv16i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; CHECK-NEXT: vlsseg2e8.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlsseg2e8.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv16i8( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -166,8 +430,30 @@ define @test_vlsseg3_nxv16i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; CHECK-NEXT: vlsseg3e8.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv16i8( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -178,11 +464,33 @@ define @test_vlsseg3_mask_nxv16i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; CHECK-NEXT: vlsseg3e8.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlsseg3e8.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv16i8( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -198,8 +506,30 @@ define @test_vlsseg4_nxv16i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; CHECK-NEXT: vlsseg4e8.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv16i8( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -210,12 +540,34 @@ define @test_vlsseg4_mask_nxv16i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, mu ; CHECK-NEXT: vlsseg4e8.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlsseg4e8.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv16i8( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -231,8 +583,22 @@ define @test_vlsseg2_nxv1i64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlsseg2e64.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1i64( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -243,10 +609,24 @@ define @test_vlsseg2_mask_nxv1i64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vlsseg2e64.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e64.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1i64( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -262,8 +642,22 @@ define @test_vlsseg3_nxv1i64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlsseg3e64.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1i64( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -274,11 +668,25 @@ define @test_vlsseg3_mask_nxv1i64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vlsseg3e64.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e64.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1i64( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -294,8 +702,22 @@ define @test_vlsseg4_nxv1i64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlsseg4e64.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1i64( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -306,12 +728,26 @@ define @test_vlsseg4_mask_nxv1i64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vlsseg4e64.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e64.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1i64( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -327,8 +763,22 @@ define @test_vlsseg5_nxv1i64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlsseg5e64.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1i64( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -339,6 +789,14 @@ define @test_vlsseg5_mask_nxv1i64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vlsseg5e64.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -346,6 +804,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e64.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1i64( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -361,8 +825,22 @@ define @test_vlsseg6_nxv1i64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlsseg6e64.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1i64( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -373,6 +851,14 @@ define @test_vlsseg6_mask_nxv1i64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vlsseg6e64.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -381,6 +867,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e64.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1i64( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -396,8 +888,22 @@ define @test_vlsseg7_nxv1i64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlsseg7e64.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1i64( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -408,6 +914,14 @@ define @test_vlsseg7_mask_nxv1i64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vlsseg7e64.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -417,6 +931,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e64.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1i64( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -432,8 +952,22 @@ define @test_vlsseg8_nxv1i64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlsseg8e64.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1i64( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -444,6 +978,14 @@ define @test_vlsseg8_mask_nxv1i64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vlsseg8e64.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -454,6 +996,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e64.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1i64( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -469,8 +1017,22 @@ define @test_vlsseg2_nxv1i32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1i32( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -481,10 +1043,24 @@ define @test_vlsseg2_mask_nxv1i32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1i32( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -500,8 +1076,22 @@ define @test_vlsseg3_nxv1i32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1i32( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -512,11 +1102,25 @@ define @test_vlsseg3_mask_nxv1i32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1i32( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -532,8 +1136,22 @@ define @test_vlsseg4_nxv1i32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1i32( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -544,12 +1162,26 @@ define @test_vlsseg4_mask_nxv1i32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1i32( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -565,8 +1197,22 @@ define @test_vlsseg5_nxv1i32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1i32( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -577,6 +1223,14 @@ define @test_vlsseg5_mask_nxv1i32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -584,6 +1238,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1i32( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -599,8 +1259,22 @@ define @test_vlsseg6_nxv1i32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1i32( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -611,6 +1285,14 @@ define @test_vlsseg6_mask_nxv1i32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -619,6 +1301,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1i32( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -634,8 +1322,22 @@ define @test_vlsseg7_nxv1i32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1i32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -646,6 +1348,14 @@ define @test_vlsseg7_mask_nxv1i32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -655,6 +1365,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1i32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -670,8 +1386,22 @@ define @test_vlsseg8_nxv1i32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1i32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -682,6 +1412,14 @@ define @test_vlsseg8_mask_nxv1i32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -692,6 +1430,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1i32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -707,8 +1451,30 @@ define @test_vlsseg2_nxv8i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma ; CHECK-NEXT: vlsseg2e16.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv8i16( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -719,10 +1485,32 @@ define @test_vlsseg2_mask_nxv8i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, mu ; CHECK-NEXT: vlsseg2e16.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlsseg2e16.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv8i16( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -738,8 +1526,30 @@ define @test_vlsseg3_nxv8i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma ; CHECK-NEXT: vlsseg3e16.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv8i16( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -750,11 +1560,33 @@ define @test_vlsseg3_mask_nxv8i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, mu ; CHECK-NEXT: vlsseg3e16.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlsseg3e16.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv8i16( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -770,8 +1602,30 @@ define @test_vlsseg4_nxv8i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma ; CHECK-NEXT: vlsseg4e16.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv8i16( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -782,12 +1636,34 @@ define @test_vlsseg4_mask_nxv8i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, mu ; CHECK-NEXT: vlsseg4e16.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlsseg4e16.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv8i16( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -803,8 +1679,22 @@ define @test_vlsseg2_nxv4i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4i8( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -815,10 +1705,24 @@ define @test_vlsseg2_mask_nxv4i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, mu ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4i8( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -834,8 +1738,22 @@ define @test_vlsseg3_nxv4i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv4i8( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -846,11 +1764,25 @@ define @test_vlsseg3_mask_nxv4i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, mu ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv4i8( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -866,8 +1798,22 @@ define @test_vlsseg4_nxv4i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv4i8( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -878,12 +1824,26 @@ define @test_vlsseg4_mask_nxv4i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, mu ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv4i8( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -899,8 +1859,22 @@ define @test_vlsseg5_nxv4i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv4i8( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -911,6 +1885,14 @@ define @test_vlsseg5_mask_nxv4i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, mu ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -918,6 +1900,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv4i8( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -933,8 +1921,22 @@ define @test_vlsseg6_nxv4i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv4i8( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -945,6 +1947,14 @@ define @test_vlsseg6_mask_nxv4i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, mu ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -953,6 +1963,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv4i8( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -968,8 +1984,22 @@ define @test_vlsseg7_nxv4i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv4i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -980,6 +2010,14 @@ define @test_vlsseg7_mask_nxv4i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, mu ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -989,6 +2027,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv4i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1004,8 +2048,22 @@ define @test_vlsseg8_nxv4i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv4i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1016,6 +2074,14 @@ define @test_vlsseg8_mask_nxv4i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf2, ta, mu ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1026,6 +2092,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv4i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1041,8 +2113,22 @@ define @test_vlsseg2_nxv1i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1i16( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1053,10 +2139,24 @@ define @test_vlsseg2_mask_nxv1i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1i16( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1072,8 +2172,22 @@ define @test_vlsseg3_nxv1i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1i16( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1084,11 +2198,25 @@ define @test_vlsseg3_mask_nxv1i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1i16( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1104,8 +2232,22 @@ define @test_vlsseg4_nxv1i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1i16( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1116,12 +2258,26 @@ define @test_vlsseg4_mask_nxv1i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1i16( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1137,8 +2293,22 @@ define @test_vlsseg5_nxv1i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1i16( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1149,6 +2319,14 @@ define @test_vlsseg5_mask_nxv1i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1156,6 +2334,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1i16( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1171,8 +2355,22 @@ define @test_vlsseg6_nxv1i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1i16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1183,6 +2381,14 @@ define @test_vlsseg6_mask_nxv1i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1191,6 +2397,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1i16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1206,8 +2418,22 @@ define @test_vlsseg7_nxv1i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1218,6 +2444,14 @@ define @test_vlsseg7_mask_nxv1i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1227,6 +2461,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1242,8 +2482,22 @@ define @test_vlsseg8_nxv1i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1254,6 +2508,14 @@ define @test_vlsseg8_mask_nxv1i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1264,6 +2526,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1279,8 +2547,22 @@ define @test_vlsseg2_nxv2i32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2i32( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1291,10 +2573,24 @@ define @test_vlsseg2_mask_nxv2i32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2i32( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1310,8 +2606,22 @@ define @test_vlsseg3_nxv2i32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2i32( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1322,11 +2632,25 @@ define @test_vlsseg3_mask_nxv2i32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2i32( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1342,8 +2666,22 @@ define @test_vlsseg4_nxv2i32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2i32( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1354,12 +2692,26 @@ define @test_vlsseg4_mask_nxv2i32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2i32( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1375,8 +2727,22 @@ define @test_vlsseg5_nxv2i32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv2i32( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1387,6 +2753,14 @@ define @test_vlsseg5_mask_nxv2i32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1394,6 +2768,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv2i32( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1409,8 +2789,22 @@ define @test_vlsseg6_nxv2i32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv2i32( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1421,6 +2815,14 @@ define @test_vlsseg6_mask_nxv2i32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1429,6 +2831,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv2i32( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1444,8 +2852,22 @@ define @test_vlsseg7_nxv2i32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv2i32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1456,6 +2878,14 @@ define @test_vlsseg7_mask_nxv2i32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1465,6 +2895,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv2i32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1480,8 +2916,22 @@ define @test_vlsseg8_nxv2i32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv2i32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1492,6 +2942,14 @@ define @test_vlsseg8_mask_nxv2i32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1502,6 +2960,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv2i32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1517,8 +2981,22 @@ define @test_vlsseg2_nxv8i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv8i8( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1529,10 +3007,24 @@ define @test_vlsseg2_mask_nxv8i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, mu ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv8i8( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1548,8 +3040,22 @@ define @test_vlsseg3_nxv8i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv8i8( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1560,11 +3066,25 @@ define @test_vlsseg3_mask_nxv8i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, mu ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv8i8( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1580,8 +3100,22 @@ define @test_vlsseg4_nxv8i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv8i8( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1592,12 +3126,26 @@ define @test_vlsseg4_mask_nxv8i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, mu ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv8i8( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1613,8 +3161,22 @@ define @test_vlsseg5_nxv8i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv8i8( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1625,6 +3187,14 @@ define @test_vlsseg5_mask_nxv8i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, mu ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1632,6 +3202,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv8i8( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1647,8 +3223,22 @@ define @test_vlsseg6_nxv8i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv8i8( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1659,6 +3249,14 @@ define @test_vlsseg6_mask_nxv8i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, mu ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1667,6 +3265,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv8i8( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1682,8 +3286,22 @@ define @test_vlsseg7_nxv8i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv8i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1694,6 +3312,14 @@ define @test_vlsseg7_mask_nxv8i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, mu ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1703,6 +3329,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv8i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1718,8 +3350,22 @@ define @test_vlsseg8_nxv8i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, ma ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv8i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1730,6 +3376,14 @@ define @test_vlsseg8_mask_nxv8i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m1, ta, mu ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1740,6 +3394,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv8i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1755,8 +3415,52 @@ define @test_vlsseg2_nxv4i64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; CHECK-NEXT: vlsseg2e64.v v4, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4i64( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1767,10 +3471,54 @@ define @test_vlsseg2_mask_nxv4i64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; CHECK-NEXT: vlsseg2e64.v v4, (a0), a1 ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlsseg2e64.v v4, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4i64( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1786,8 +3534,22 @@ define @test_vlsseg2_nxv4i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4i16( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1798,10 +3560,24 @@ define @test_vlsseg2_mask_nxv4i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4i16( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1817,8 +3593,22 @@ define @test_vlsseg3_nxv4i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv4i16( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1829,11 +3619,25 @@ define @test_vlsseg3_mask_nxv4i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv4i16( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1849,8 +3653,22 @@ define @test_vlsseg4_nxv4i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv4i16( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1861,12 +3679,26 @@ define @test_vlsseg4_mask_nxv4i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv4i16( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1882,8 +3714,22 @@ define @test_vlsseg5_nxv4i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv4i16( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1894,6 +3740,14 @@ define @test_vlsseg5_mask_nxv4i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1901,6 +3755,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv4i16( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1916,8 +3776,22 @@ define @test_vlsseg6_nxv4i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv4i16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1928,6 +3802,14 @@ define @test_vlsseg6_mask_nxv4i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1936,6 +3818,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv4i16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1951,8 +3839,22 @@ define @test_vlsseg7_nxv4i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv4i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1963,6 +3865,14 @@ define @test_vlsseg7_mask_nxv4i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -1972,6 +3882,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv4i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1987,8 +3903,22 @@ define @test_vlsseg8_nxv4i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv4i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -1999,6 +3929,14 @@ define @test_vlsseg8_mask_nxv4i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2009,6 +3947,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv4i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2024,8 +3968,22 @@ define @test_vlsseg2_nxv1i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1i8( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2036,10 +3994,24 @@ define @test_vlsseg2_mask_nxv1i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, mu ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1i8( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2055,8 +4027,22 @@ define @test_vlsseg3_nxv1i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1i8( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2067,11 +4053,25 @@ define @test_vlsseg3_mask_nxv1i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, mu ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1i8( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2087,8 +4087,22 @@ define @test_vlsseg4_nxv1i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1i8( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2099,12 +4113,26 @@ define @test_vlsseg4_mask_nxv1i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, mu ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1i8( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2120,8 +4148,22 @@ define @test_vlsseg5_nxv1i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1i8( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2132,6 +4174,14 @@ define @test_vlsseg5_mask_nxv1i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, mu ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2139,6 +4189,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1i8( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2154,8 +4210,22 @@ define @test_vlsseg6_nxv1i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1i8( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2166,6 +4236,14 @@ define @test_vlsseg6_mask_nxv1i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, mu ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2174,6 +4252,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1i8( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2189,8 +4273,22 @@ define @test_vlsseg7_nxv1i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2201,6 +4299,14 @@ define @test_vlsseg7_mask_nxv1i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, mu ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2210,6 +4316,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2225,8 +4337,22 @@ define @test_vlsseg8_nxv1i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, ma ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2237,6 +4363,14 @@ define @test_vlsseg8_mask_nxv1i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf8, ta, mu ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2247,6 +4381,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2262,8 +4402,22 @@ define @test_vlsseg2_nxv2i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2i8( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2274,10 +4428,24 @@ define @test_vlsseg2_mask_nxv2i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, mu ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2i8( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2293,8 +4461,22 @@ define @test_vlsseg3_nxv2i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2i8( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2305,11 +4487,25 @@ define @test_vlsseg3_mask_nxv2i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, mu ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2i8( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2325,8 +4521,22 @@ define @test_vlsseg4_nxv2i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2i8( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2337,12 +4547,26 @@ define @test_vlsseg4_mask_nxv2i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, mu ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2i8( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2358,8 +4582,22 @@ define @test_vlsseg5_nxv2i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv2i8( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2370,6 +4608,14 @@ define @test_vlsseg5_mask_nxv2i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, mu ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2377,6 +4623,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv2i8( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2392,8 +4644,22 @@ define @test_vlsseg6_nxv2i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv2i8( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2404,6 +4670,14 @@ define @test_vlsseg6_mask_nxv2i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, mu ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2412,6 +4686,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv2i8( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2427,8 +4707,22 @@ define @test_vlsseg7_nxv2i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv2i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2439,6 +4733,14 @@ define @test_vlsseg7_mask_nxv2i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, mu ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2448,6 +4750,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv2i8( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2463,8 +4771,22 @@ define @test_vlsseg8_nxv2i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, ma ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv2i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2475,6 +4797,14 @@ define @test_vlsseg8_mask_nxv2i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, mf4, ta, mu ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2485,6 +4815,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e8.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv2i8( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2500,8 +4836,52 @@ define @test_vlsseg2_nxv8i32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma ; CHECK-NEXT: vlsseg2e32.v v4, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv8i32( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2512,10 +4892,54 @@ define @test_vlsseg2_mask_nxv8i32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu ; CHECK-NEXT: vlsseg2e32.v v4, (a0), a1 ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlsseg2e32.v v4, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv8i32( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2531,8 +4955,52 @@ define @test_vlsseg2_nxv32i8(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; CHECK-NEXT: vlsseg2e8.v v4, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv32i8( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2543,10 +5011,54 @@ define @test_vlsseg2_mask_nxv32i8(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, mu ; CHECK-NEXT: vlsseg2e8.v v4, (a0), a1 ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlsseg2e8.v v4, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv32i8( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2562,8 +5074,22 @@ define @test_vlsseg2_nxv2i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2i16( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2574,10 +5100,24 @@ define @test_vlsseg2_mask_nxv2i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2i16( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2593,8 +5133,22 @@ define @test_vlsseg3_nxv2i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2i16( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2605,11 +5159,25 @@ define @test_vlsseg3_mask_nxv2i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2i16( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2625,8 +5193,22 @@ define @test_vlsseg4_nxv2i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2i16( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2637,12 +5219,26 @@ define @test_vlsseg4_mask_nxv2i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2i16( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2658,8 +5254,22 @@ define @test_vlsseg5_nxv2i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv2i16( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2670,6 +5280,14 @@ define @test_vlsseg5_mask_nxv2i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2677,6 +5295,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv2i16( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2692,8 +5316,22 @@ define @test_vlsseg6_nxv2i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv2i16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2704,6 +5342,14 @@ define @test_vlsseg6_mask_nxv2i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2712,6 +5358,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv2i16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2727,8 +5379,22 @@ define @test_vlsseg7_nxv2i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv2i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2739,6 +5405,14 @@ define @test_vlsseg7_mask_nxv2i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2748,6 +5422,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv2i16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2763,8 +5443,22 @@ define @test_vlsseg8_nxv2i16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv2i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2775,6 +5469,14 @@ define @test_vlsseg8_mask_nxv2i16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -2785,6 +5487,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv2i16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2800,8 +5508,30 @@ define @test_vlsseg2_nxv2i64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; CHECK-NEXT: vlsseg2e64.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2i64( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2812,10 +5542,32 @@ define @test_vlsseg2_mask_nxv2i64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; CHECK-NEXT: vlsseg2e64.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlsseg2e64.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2i64( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2831,8 +5583,30 @@ define @test_vlsseg3_nxv2i64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; CHECK-NEXT: vlsseg3e64.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2i64( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2843,11 +5617,33 @@ define @test_vlsseg3_mask_nxv2i64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; CHECK-NEXT: vlsseg3e64.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlsseg3e64.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2i64( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2863,8 +5659,30 @@ define @test_vlsseg4_nxv2i64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; CHECK-NEXT: vlsseg4e64.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2i64( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2875,12 +5693,34 @@ define @test_vlsseg4_mask_nxv2i64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; CHECK-NEXT: vlsseg4e64.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlsseg4e64.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2i64( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2896,8 +5736,52 @@ define @test_vlsseg2_nxv16f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; CHECK-NEXT: vlsseg2e16.v v4, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv16f16( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2908,10 +5792,54 @@ define @test_vlsseg2_mask_nxv16f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv16f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, mu ; CHECK-NEXT: vlsseg2e16.v v4, (a0), a1 ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlsseg2e16.v v4, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv16f16( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2927,8 +5855,52 @@ define @test_vlsseg2_nxv4f64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, ma ; CHECK-NEXT: vlsseg2e64.v v4, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4f64( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2939,10 +5911,54 @@ define @test_vlsseg2_mask_nxv4f64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv4f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; CHECK-NEXT: vlsseg2e64.v v4, (a0), a1 ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlsseg2e64.v v4, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4f64( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2958,8 +5974,22 @@ define @test_vlsseg2_nxv1f64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlsseg2e64.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1f64( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2970,10 +6000,24 @@ define @test_vlsseg2_mask_nxv1f64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vlsseg2e64.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e64.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1f64( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -2989,8 +6033,22 @@ define @test_vlsseg3_nxv1f64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlsseg3e64.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1f64( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3001,11 +6059,25 @@ define @test_vlsseg3_mask_nxv1f64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vlsseg3e64.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e64.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1f64( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3021,8 +6093,22 @@ define @test_vlsseg4_nxv1f64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlsseg4e64.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1f64( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3033,12 +6119,26 @@ define @test_vlsseg4_mask_nxv1f64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vlsseg4e64.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e64.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1f64( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3054,8 +6154,22 @@ define @test_vlsseg5_nxv1f64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlsseg5e64.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1f64( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3066,6 +6180,14 @@ define @test_vlsseg5_mask_nxv1f64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vlsseg5e64.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3073,6 +6195,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e64.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1f64( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3088,8 +6216,22 @@ define @test_vlsseg6_nxv1f64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlsseg6e64.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1f64( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3100,6 +6242,14 @@ define @test_vlsseg6_mask_nxv1f64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vlsseg6e64.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3108,6 +6258,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e64.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1f64( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3123,8 +6279,22 @@ define @test_vlsseg7_nxv1f64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlsseg7e64.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1f64( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3135,6 +6305,14 @@ define @test_vlsseg7_mask_nxv1f64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vlsseg7e64.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3144,6 +6322,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e64.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1f64( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3159,8 +6343,22 @@ define @test_vlsseg8_nxv1f64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, ma ; CHECK-NEXT: vlsseg8e64.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1f64( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3171,6 +6369,14 @@ define @test_vlsseg8_mask_nxv1f64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv1f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; CHECK-NEXT: vlsseg8e64.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3181,6 +6387,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e64.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1f64( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3196,8 +6408,22 @@ define @test_vlsseg2_nxv2f32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2f32( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3208,10 +6434,24 @@ define @test_vlsseg2_mask_nxv2f32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2f32( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3227,8 +6467,22 @@ define @test_vlsseg3_nxv2f32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2f32( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3239,11 +6493,25 @@ define @test_vlsseg3_mask_nxv2f32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2f32( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3259,8 +6527,22 @@ define @test_vlsseg4_nxv2f32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2f32( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3271,12 +6553,26 @@ define @test_vlsseg4_mask_nxv2f32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2f32( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3292,8 +6588,22 @@ define @test_vlsseg5_nxv2f32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv2f32( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3304,6 +6614,14 @@ define @test_vlsseg5_mask_nxv2f32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3311,6 +6629,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv2f32( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3326,8 +6650,22 @@ define @test_vlsseg6_nxv2f32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv2f32( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3338,6 +6676,14 @@ define @test_vlsseg6_mask_nxv2f32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3346,6 +6692,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv2f32( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3361,8 +6713,22 @@ define @test_vlsseg7_nxv2f32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv2f32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3373,6 +6739,14 @@ define @test_vlsseg7_mask_nxv2f32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3382,6 +6756,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv2f32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3397,8 +6777,22 @@ define @test_vlsseg8_nxv2f32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv2f32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3409,6 +6803,14 @@ define @test_vlsseg8_mask_nxv2f32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv2f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, mu ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3419,6 +6821,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv2f32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3434,8 +6842,22 @@ define @test_vlsseg2_nxv1f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1f16( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3446,10 +6868,24 @@ define @test_vlsseg2_mask_nxv1f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1f16( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3465,8 +6901,22 @@ define @test_vlsseg3_nxv1f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1f16( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3477,11 +6927,25 @@ define @test_vlsseg3_mask_nxv1f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1f16( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3497,8 +6961,22 @@ define @test_vlsseg4_nxv1f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1f16( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3509,12 +6987,26 @@ define @test_vlsseg4_mask_nxv1f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1f16( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3530,8 +7022,22 @@ define @test_vlsseg5_nxv1f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1f16( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3542,6 +7048,14 @@ define @test_vlsseg5_mask_nxv1f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3549,6 +7063,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1f16( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3564,8 +7084,22 @@ define @test_vlsseg6_nxv1f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1f16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3576,6 +7110,14 @@ define @test_vlsseg6_mask_nxv1f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3584,6 +7126,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1f16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3599,8 +7147,22 @@ define @test_vlsseg7_nxv1f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3611,6 +7173,14 @@ define @test_vlsseg7_mask_nxv1f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3620,6 +7190,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3635,8 +7211,22 @@ define @test_vlsseg8_nxv1f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, ma ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3647,6 +7237,14 @@ define @test_vlsseg8_mask_nxv1f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv1f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf4, ta, mu ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3657,6 +7255,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3672,8 +7276,22 @@ define @test_vlsseg2_nxv1f32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1f32( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3684,10 +7302,24 @@ define @test_vlsseg2_mask_nxv1f32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv1f32( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3703,8 +7335,22 @@ define @test_vlsseg3_nxv1f32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1f32( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3715,11 +7361,25 @@ define @test_vlsseg3_mask_nxv1f32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv1f32( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3735,8 +7395,22 @@ define @test_vlsseg4_nxv1f32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1f32( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3747,12 +7421,26 @@ define @test_vlsseg4_mask_nxv1f32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv1f32( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3768,8 +7456,22 @@ define @test_vlsseg5_nxv1f32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1f32( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3780,6 +7482,14 @@ define @test_vlsseg5_mask_nxv1f32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3787,6 +7497,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv1f32( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3802,8 +7518,22 @@ define @test_vlsseg6_nxv1f32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1f32( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3814,6 +7544,14 @@ define @test_vlsseg6_mask_nxv1f32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3822,6 +7560,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv1f32( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3837,8 +7581,22 @@ define @test_vlsseg7_nxv1f32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1f32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3849,6 +7607,14 @@ define @test_vlsseg7_mask_nxv1f32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3858,6 +7624,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv1f32( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3873,8 +7645,22 @@ define @test_vlsseg8_nxv1f32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, ma ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1f32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3885,6 +7671,14 @@ define @test_vlsseg8_mask_nxv1f32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv1f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, mf2, ta, mu ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -3895,6 +7689,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e32.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv1f32( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3910,8 +7710,30 @@ define @test_vlsseg2_nxv8f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma ; CHECK-NEXT: vlsseg2e16.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv8f16( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3922,10 +7744,32 @@ define @test_vlsseg2_mask_nxv8f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, mu ; CHECK-NEXT: vlsseg2e16.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlsseg2e16.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv8f16( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3941,8 +7785,30 @@ define @test_vlsseg3_nxv8f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma ; CHECK-NEXT: vlsseg3e16.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv8f16( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3953,11 +7819,33 @@ define @test_vlsseg3_mask_nxv8f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, mu ; CHECK-NEXT: vlsseg3e16.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlsseg3e16.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv8f16( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3973,8 +7861,30 @@ define @test_vlsseg4_nxv8f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, ma ; CHECK-NEXT: vlsseg4e16.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv8f16( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -3985,12 +7895,34 @@ define @test_vlsseg4_mask_nxv8f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv8f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m2, ta, mu ; CHECK-NEXT: vlsseg4e16.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlsseg4e16.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv8f16( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4006,8 +7938,52 @@ define @test_vlsseg2_nxv8f32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma ; CHECK-NEXT: vlsseg2e32.v v4, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv8f32( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4018,10 +7994,54 @@ define @test_vlsseg2_mask_nxv8f32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv8f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, mu ; CHECK-NEXT: vlsseg2e32.v v4, (a0), a1 ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlsseg2e32.v v4, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv8f32( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4037,8 +8057,30 @@ define @test_vlsseg2_nxv2f64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; CHECK-NEXT: vlsseg2e64.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2f64( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4049,10 +8091,32 @@ define @test_vlsseg2_mask_nxv2f64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; CHECK-NEXT: vlsseg2e64.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlsseg2e64.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2f64( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4068,8 +8132,30 @@ define @test_vlsseg3_nxv2f64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; CHECK-NEXT: vlsseg3e64.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2f64( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4080,11 +8166,33 @@ define @test_vlsseg3_mask_nxv2f64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; CHECK-NEXT: vlsseg3e64.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlsseg3e64.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2f64( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4100,8 +8208,30 @@ define @test_vlsseg4_nxv2f64(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, ma ; CHECK-NEXT: vlsseg4e64.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2f64( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4112,12 +8242,34 @@ define @test_vlsseg4_mask_nxv2f64(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv2f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; CHECK-NEXT: vlsseg4e64.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlsseg4e64.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2f64( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4133,8 +8285,22 @@ define @test_vlsseg2_nxv4f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4f16( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4145,10 +8311,24 @@ define @test_vlsseg2_mask_nxv4f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4f16( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4164,8 +8344,22 @@ define @test_vlsseg3_nxv4f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv4f16( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4176,11 +8370,25 @@ define @test_vlsseg3_mask_nxv4f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv4f16( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4196,8 +8404,22 @@ define @test_vlsseg4_nxv4f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv4f16( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4208,12 +8430,26 @@ define @test_vlsseg4_mask_nxv4f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv4f16( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4229,8 +8465,22 @@ define @test_vlsseg5_nxv4f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv4f16( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4241,6 +8491,14 @@ define @test_vlsseg5_mask_nxv4f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4248,6 +8506,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv4f16( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4263,8 +8527,22 @@ define @test_vlsseg6_nxv4f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv4f16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4275,6 +8553,14 @@ define @test_vlsseg6_mask_nxv4f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4283,6 +8569,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv4f16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4298,8 +8590,22 @@ define @test_vlsseg7_nxv4f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv4f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4310,6 +8616,14 @@ define @test_vlsseg7_mask_nxv4f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4319,6 +8633,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv4f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4334,8 +8654,22 @@ define @test_vlsseg8_nxv4f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv4f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4346,6 +8680,14 @@ define @test_vlsseg8_mask_nxv4f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv4f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, mu ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4356,6 +8698,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv4f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4371,8 +8719,22 @@ define @test_vlsseg2_nxv2f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2f16( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4383,10 +8745,24 @@ define @test_vlsseg2_mask_nxv2f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vlsseg2e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv2f16( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4402,8 +8778,22 @@ define @test_vlsseg3_nxv2f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2f16( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4414,11 +8804,25 @@ define @test_vlsseg3_mask_nxv2f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vlsseg3e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv2f16( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4434,8 +8838,22 @@ define @test_vlsseg4_nxv2f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2f16( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4446,12 +8864,26 @@ define @test_vlsseg4_mask_nxv2f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 ; CHECK-NEXT: vmv1r.v v9, v7 ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vlsseg4e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv2f16( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4467,8 +8899,22 @@ define @test_vlsseg5_nxv2f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg5_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv2f16( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4479,6 +8925,14 @@ define @test_vlsseg5_mask_nxv2f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg5_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4486,6 +8940,12 @@ ; CHECK-NEXT: vmv1r.v v10, v7 ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vlsseg5e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vlsseg5.nxv2f16( undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4501,8 +8961,22 @@ define @test_vlsseg6_nxv2f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg6_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv2f16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4513,6 +8987,14 @@ define @test_vlsseg6_mask_nxv2f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg6_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4521,6 +9003,12 @@ ; CHECK-NEXT: vmv1r.v v11, v7 ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vlsseg6e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vlsseg6.nxv2f16( undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4536,8 +9024,22 @@ define @test_vlsseg7_nxv2f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg7_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv2f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4548,6 +9050,14 @@ define @test_vlsseg7_mask_nxv2f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg7_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4557,6 +9067,12 @@ ; CHECK-NEXT: vmv1r.v v12, v7 ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vlsseg7e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vlsseg7.nxv2f16( undef, undef, undef, undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4572,8 +9088,22 @@ define @test_vlsseg8_nxv2f16(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg8_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv2f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4584,6 +9114,14 @@ define @test_vlsseg8_mask_nxv2f16(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg8_mask_nxv2f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, mu ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1 ; CHECK-NEXT: vmv1r.v v8, v7 @@ -4594,6 +9132,12 @@ ; CHECK-NEXT: vmv1r.v v13, v7 ; CHECK-NEXT: vmv1r.v v14, v7 ; CHECK-NEXT: vlsseg8e16.v v7, (a0), a1, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vlsseg8.nxv2f16( undef, undef , undef , undef, undef , undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4609,8 +9153,30 @@ define @test_vlsseg2_nxv4f32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg2_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vlsseg2e32.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4f32( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4621,10 +9187,32 @@ define @test_vlsseg2_mask_nxv4f32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu ; CHECK-NEXT: vlsseg2e32.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vlsseg2e32.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv4f32( undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4640,8 +9228,30 @@ define @test_vlsseg3_nxv4f32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg3_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vlsseg3e32.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv4f32( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4652,11 +9262,33 @@ define @test_vlsseg3_mask_nxv4f32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg3_mask_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu ; CHECK-NEXT: vlsseg3e32.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vlsseg3e32.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vlsseg3.nxv4f32( undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4672,8 +9304,30 @@ define @test_vlsseg4_nxv4f32(ptr %base, i64 %offset, i64 %vl) { ; CHECK-LABEL: test_vlsseg4_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; CHECK-NEXT: vlsseg4e32.v v6, (a0), a1 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv4f32( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) @@ -4684,12 +9338,34 @@ define @test_vlsseg4_mask_nxv4f32(ptr %base, i64 %offset, i64 %vl, %mask) { ; CHECK-LABEL: test_vlsseg4_mask_nxv4f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a2, e32, m2, ta, mu ; CHECK-NEXT: vlsseg4e32.v v6, (a0), a1 ; CHECK-NEXT: vmv2r.v v8, v6 ; CHECK-NEXT: vmv2r.v v10, v6 ; CHECK-NEXT: vmv2r.v v12, v6 ; CHECK-NEXT: vlsseg4e32.v v6, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vlsseg4.nxv4f32( undef, undef, undef, undef, ptr %base, i64 %offset, i64 %vl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv32.ll @@ -21,9 +21,53 @@ define @test_vluxseg2_mask_nxv16i16_nxv16i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv16i16.nxv16i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -50,9 +94,53 @@ define @test_vluxseg2_mask_nxv16i16_nxv16i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv16i16_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv16i16.nxv16i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -79,9 +167,53 @@ define @test_vluxseg2_mask_nxv16i16_nxv16i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv16i16_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v4, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv16i16.nxv16i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -108,9 +240,23 @@ define @test_vluxseg2_mask_nxv1i8_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i8.nxv1i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -137,9 +283,23 @@ define @test_vluxseg2_mask_nxv1i8_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i8_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i8.nxv1i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -166,9 +326,23 @@ define @test_vluxseg2_mask_nxv1i8_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i8_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i8.nxv1i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -195,11 +369,25 @@ define @test_vluxseg3_mask_nxv1i8_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i8.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -226,11 +414,25 @@ define @test_vluxseg3_mask_nxv1i8_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i8_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i8.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -257,11 +459,25 @@ define @test_vluxseg3_mask_nxv1i8_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i8_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i8.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -813,9 +1029,31 @@ define @test_vluxseg2_mask_nxv16i8_nxv16i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv16i8_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv16i8.nxv16i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -842,9 +1080,31 @@ define @test_vluxseg2_mask_nxv16i8_nxv16i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv16i8.nxv16i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -871,9 +1131,31 @@ define @test_vluxseg2_mask_nxv16i8_nxv16i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv16i8_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv16i8.nxv16i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -900,10 +1182,32 @@ define @test_vluxseg3_mask_nxv16i8_nxv16i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv16i8_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv16i8.nxv16i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -930,11 +1234,33 @@ define @test_vluxseg3_mask_nxv16i8_nxv16i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv16i8.nxv16i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -961,10 +1287,32 @@ define @test_vluxseg3_mask_nxv16i8_nxv16i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv16i8_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv16i8.nxv16i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -991,12 +1339,34 @@ define @test_vluxseg4_mask_nxv16i8_nxv16i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv16i8_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv4r.v v16, v12 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vluxseg4ei16.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv16i8.nxv16i16( %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1056,11 +1426,33 @@ define @test_vluxseg4_mask_nxv16i8_nxv16i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv16i8_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vluxseg4ei32.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv16i8.nxv16i32( %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1087,9 +1479,23 @@ define @test_vluxseg2_mask_nxv2i32_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i32.nxv2i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1116,9 +1522,23 @@ define @test_vluxseg2_mask_nxv2i32_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i32_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i32.nxv2i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1145,9 +1565,23 @@ define @test_vluxseg2_mask_nxv2i32_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i32.nxv2i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1174,11 +1608,25 @@ define @test_vluxseg3_mask_nxv2i32_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i32.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1205,11 +1653,25 @@ define @test_vluxseg3_mask_nxv2i32_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i32_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i32.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1236,11 +1698,25 @@ define @test_vluxseg3_mask_nxv2i32_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i32.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1792,9 +2268,23 @@ define @test_vluxseg2_mask_nxv4i16_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i16.nxv4i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1821,9 +2311,23 @@ define @test_vluxseg2_mask_nxv4i16_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i16.nxv4i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1850,9 +2354,23 @@ define @test_vluxseg2_mask_nxv4i16_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i16.nxv4i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1879,11 +2397,25 @@ define @test_vluxseg3_mask_nxv4i16_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4i16.nxv4i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1910,11 +2442,25 @@ define @test_vluxseg3_mask_nxv4i16_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4i16.nxv4i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -1941,10 +2487,24 @@ define @test_vluxseg3_mask_nxv4i16_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4i16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4i16.nxv4i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -2037,12 +2597,26 @@ define @test_vluxseg4_mask_nxv4i16_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv4i16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg4ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv4i16.nxv4i32( %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -2495,9 +3069,23 @@ define @test_vluxseg2_mask_nxv1i32_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i32_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i32.nxv1i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -2524,9 +3112,23 @@ define @test_vluxseg2_mask_nxv1i32_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i32.nxv1i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -2553,9 +3155,23 @@ define @test_vluxseg2_mask_nxv1i32_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i32.nxv1i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -2582,11 +3198,25 @@ define @test_vluxseg3_mask_nxv1i32_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i32_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i32.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -2613,11 +3243,25 @@ define @test_vluxseg3_mask_nxv1i32_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i32.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -2644,11 +3288,25 @@ define @test_vluxseg3_mask_nxv1i32_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i32.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3200,9 +3858,31 @@ define @test_vluxseg2_mask_nxv8i16_nxv8i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8i16.nxv8i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3229,9 +3909,31 @@ define @test_vluxseg2_mask_nxv8i16_nxv8i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8i16.nxv8i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3258,9 +3960,31 @@ define @test_vluxseg2_mask_nxv8i16_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8i16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8i16.nxv8i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3287,11 +4011,33 @@ define @test_vluxseg3_mask_nxv8i16_nxv8i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv8i16.nxv8i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3318,11 +4064,33 @@ define @test_vluxseg3_mask_nxv8i16_nxv8i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv8i16.nxv8i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3349,10 +4117,32 @@ define @test_vluxseg3_mask_nxv8i16_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv8i16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv8i16.nxv8i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3445,12 +4235,34 @@ define @test_vluxseg4_mask_nxv8i16_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv8i16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv4r.v v16, v12 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg4ei32.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv8i16.nxv8i32( %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3477,9 +4289,23 @@ define @test_vluxseg2_mask_nxv8i8_nxv8i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8i8_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8i8.nxv8i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3506,9 +4332,23 @@ define @test_vluxseg2_mask_nxv8i8_nxv8i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8i8.nxv8i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3535,9 +4375,23 @@ define @test_vluxseg2_mask_nxv8i8_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8i8_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8i8.nxv8i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3564,10 +4418,24 @@ define @test_vluxseg3_mask_nxv8i8_nxv8i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv8i8_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv8i8.nxv8i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3594,11 +4462,25 @@ define @test_vluxseg3_mask_nxv8i8_nxv8i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv8i8.nxv8i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3625,10 +4507,24 @@ define @test_vluxseg3_mask_nxv8i8_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv8i8_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv8i8.nxv8i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3655,12 +4551,26 @@ define @test_vluxseg4_mask_nxv8i8_nxv8i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv8i8_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg4ei16.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv8i8.nxv8i16( %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3720,11 +4630,25 @@ define @test_vluxseg4_mask_nxv8i8_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv8i8_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg4ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv8i8.nxv8i32( %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3819,12 +4743,26 @@ define @test_vluxseg5_mask_nxv8i8_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg5_mask_nxv8i8_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vmv1r.v v11, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg5ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vluxseg5.mask.nxv8i8.nxv8i32( %val, %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -3921,6 +4859,14 @@ define @test_vluxseg6_mask_nxv8i8_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg6_mask_nxv8i8_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3929,6 +4875,12 @@ ; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg6ei32.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vluxseg6.mask.nxv8i8.nxv8i32( %val, %val, %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4174,9 +5126,53 @@ define @test_vluxseg2_mask_nxv8i32_nxv8i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8i32_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8i32.nxv8i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4203,9 +5199,53 @@ define @test_vluxseg2_mask_nxv8i32_nxv8i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8i32_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8i32.nxv8i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4232,9 +5272,53 @@ define @test_vluxseg2_mask_nxv8i32_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8i32.nxv8i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4261,9 +5345,23 @@ define @test_vluxseg2_mask_nxv4i8_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i8_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i8.nxv4i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4290,9 +5388,23 @@ define @test_vluxseg2_mask_nxv4i8_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i8.nxv4i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4319,9 +5431,23 @@ define @test_vluxseg2_mask_nxv4i8_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i8_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i8.nxv4i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4348,11 +5474,25 @@ define @test_vluxseg3_mask_nxv4i8_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4i8_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4i8.nxv4i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4379,11 +5519,25 @@ define @test_vluxseg3_mask_nxv4i8_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4i8.nxv4i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4410,10 +5564,24 @@ define @test_vluxseg3_mask_nxv4i8_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4i8_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4i8.nxv4i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4506,12 +5674,26 @@ define @test_vluxseg4_mask_nxv4i8_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv4i8_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vluxseg4ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv4i8.nxv4i32( %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4964,9 +6146,23 @@ define @test_vluxseg2_mask_nxv1i16_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i16.nxv1i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -4993,9 +6189,23 @@ define @test_vluxseg2_mask_nxv1i16_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i16_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i16.nxv1i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5022,9 +6232,23 @@ define @test_vluxseg2_mask_nxv1i16_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i16.nxv1i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5051,11 +6275,25 @@ define @test_vluxseg3_mask_nxv1i16_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i16.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5082,11 +6320,25 @@ define @test_vluxseg3_mask_nxv1i16_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i16_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i16.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5113,11 +6365,25 @@ define @test_vluxseg3_mask_nxv1i16_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i16.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5669,9 +6935,53 @@ define @test_vluxseg2_mask_nxv32i8_nxv32i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv32i8_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv32i8.nxv32i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5698,9 +7008,53 @@ define @test_vluxseg2_mask_nxv32i8_nxv32i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv32i8.nxv32i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5727,9 +7081,23 @@ define @test_vluxseg2_mask_nxv2i8_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i8_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i8.nxv2i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5756,9 +7124,23 @@ define @test_vluxseg2_mask_nxv2i8_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i8.nxv2i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5785,9 +7167,23 @@ define @test_vluxseg2_mask_nxv2i8_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i8_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i8.nxv2i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5814,11 +7210,25 @@ define @test_vluxseg3_mask_nxv2i8_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i8_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i8.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5845,11 +7255,25 @@ define @test_vluxseg3_mask_nxv2i8_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i8.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -5876,11 +7300,25 @@ define @test_vluxseg3_mask_nxv2i8_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i8_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i8.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -6432,9 +7870,23 @@ define @test_vluxseg2_mask_nxv2i16_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i16_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i16.nxv2i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -6461,9 +7913,23 @@ define @test_vluxseg2_mask_nxv2i16_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i16.nxv2i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -6490,9 +7956,23 @@ define @test_vluxseg2_mask_nxv2i16_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i16.nxv2i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -6519,11 +7999,25 @@ define @test_vluxseg3_mask_nxv2i16_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i16_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i16.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -6550,11 +8044,25 @@ define @test_vluxseg3_mask_nxv2i16_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i16.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -6581,11 +8089,25 @@ define @test_vluxseg3_mask_nxv2i16_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i16.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7137,9 +8659,31 @@ define @test_vluxseg2_mask_nxv4i32_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i32.nxv4i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7166,9 +8710,31 @@ define @test_vluxseg2_mask_nxv4i32_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i32_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i32.nxv4i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7195,9 +8761,31 @@ define @test_vluxseg2_mask_nxv4i32_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i32.nxv4i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7224,11 +8812,33 @@ define @test_vluxseg3_mask_nxv4i32_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4i32.nxv4i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7255,11 +8865,33 @@ define @test_vluxseg3_mask_nxv4i32_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4i32_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4i32.nxv4i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7286,11 +8918,33 @@ define @test_vluxseg3_mask_nxv4i32_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4i32.nxv4i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7416,9 +9070,53 @@ define @test_vluxseg2_mask_nxv16f16_nxv16i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv16f16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv16f16.nxv16i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7445,9 +9143,53 @@ define @test_vluxseg2_mask_nxv16f16_nxv16i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv16f16_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv16f16.nxv16i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7474,9 +9216,53 @@ define @test_vluxseg2_mask_nxv16f16_nxv16i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv16f16_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v4, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv16f16.nxv16i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7503,9 +9289,53 @@ define @test_vluxseg2_mask_nxv4f64_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4f64_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4f64.nxv4i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7532,9 +9362,53 @@ define @test_vluxseg2_mask_nxv4f64_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4f64_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4f64.nxv4i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7561,9 +9435,53 @@ define @test_vluxseg2_mask_nxv4f64_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4f64_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4f64.nxv4i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7590,9 +9508,23 @@ define @test_vluxseg2_mask_nxv1f64_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1f64_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1f64.nxv1i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7619,9 +9551,23 @@ define @test_vluxseg2_mask_nxv1f64_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1f64_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1f64.nxv1i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7648,9 +9594,23 @@ define @test_vluxseg2_mask_nxv1f64_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1f64_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1f64.nxv1i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7677,11 +9637,25 @@ define @test_vluxseg3_mask_nxv1f64_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1f64_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1f64.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7708,11 +9682,25 @@ define @test_vluxseg3_mask_nxv1f64_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1f64_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1f64.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -7739,11 +9727,25 @@ define @test_vluxseg3_mask_nxv1f64_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1f64_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1f64.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -8295,9 +10297,23 @@ define @test_vluxseg2_mask_nxv2f32_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2f32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2f32.nxv2i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -8324,9 +10340,23 @@ define @test_vluxseg2_mask_nxv2f32_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2f32_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2f32.nxv2i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -8353,9 +10383,23 @@ define @test_vluxseg2_mask_nxv2f32_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2f32_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2f32.nxv2i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -8382,11 +10426,25 @@ define @test_vluxseg3_mask_nxv2f32_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2f32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2f32.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -8413,11 +10471,25 @@ define @test_vluxseg3_mask_nxv2f32_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2f32_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2f32.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -8444,11 +10516,25 @@ define @test_vluxseg3_mask_nxv2f32_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2f32_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2f32.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9000,9 +11086,23 @@ define @test_vluxseg2_mask_nxv1f16_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1f16_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1f16.nxv1i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9029,9 +11129,23 @@ define @test_vluxseg2_mask_nxv1f16_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1f16_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1f16.nxv1i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9058,9 +11172,23 @@ define @test_vluxseg2_mask_nxv1f16_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1f16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1f16.nxv1i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9087,11 +11215,25 @@ define @test_vluxseg3_mask_nxv1f16_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1f16_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1f16.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9118,11 +11260,25 @@ define @test_vluxseg3_mask_nxv1f16_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1f16_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1f16.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9149,11 +11305,25 @@ define @test_vluxseg3_mask_nxv1f16_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1f16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1f16.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9705,9 +11875,23 @@ define @test_vluxseg2_mask_nxv1f32_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1f32_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1f32.nxv1i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9734,9 +11918,23 @@ define @test_vluxseg2_mask_nxv1f32_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1f32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1f32.nxv1i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9763,9 +11961,23 @@ define @test_vluxseg2_mask_nxv1f32_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1f32_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1f32.nxv1i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9792,11 +12004,25 @@ define @test_vluxseg3_mask_nxv1f32_nxv1i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1f32_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1f32.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9823,11 +12049,25 @@ define @test_vluxseg3_mask_nxv1f32_nxv1i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1f32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1f32.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -9854,11 +12094,25 @@ define @test_vluxseg3_mask_nxv1f32_nxv1i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1f32_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1f32.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10410,9 +12664,31 @@ define @test_vluxseg2_mask_nxv8f16_nxv8i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8f16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8f16.nxv8i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10439,9 +12715,31 @@ define @test_vluxseg2_mask_nxv8f16_nxv8i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8f16_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8f16.nxv8i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10468,9 +12766,31 @@ define @test_vluxseg2_mask_nxv8f16_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8f16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8f16.nxv8i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10497,11 +12817,33 @@ define @test_vluxseg3_mask_nxv8f16_nxv8i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv8f16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv8f16.nxv8i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10528,11 +12870,33 @@ define @test_vluxseg3_mask_nxv8f16_nxv8i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv8f16_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv8f16.nxv8i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10559,10 +12923,32 @@ define @test_vluxseg3_mask_nxv8f16_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv8f16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv8f16.nxv8i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10655,12 +13041,34 @@ define @test_vluxseg4_mask_nxv8f16_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv8f16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv4r.v v16, v12 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg4ei32.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv8f16.nxv8i32( %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10687,9 +13095,53 @@ define @test_vluxseg2_mask_nxv8f32_nxv8i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8f32_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8f32.nxv8i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10716,9 +13168,53 @@ define @test_vluxseg2_mask_nxv8f32_nxv8i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8f32_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8f32.nxv8i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10745,9 +13241,53 @@ define @test_vluxseg2_mask_nxv8f32_nxv8i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8f32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8f32.nxv8i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10774,9 +13314,31 @@ define @test_vluxseg2_mask_nxv2f64_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2f64_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2f64.nxv2i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10803,9 +13365,31 @@ define @test_vluxseg2_mask_nxv2f64_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2f64_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2f64.nxv2i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10832,9 +13416,31 @@ define @test_vluxseg2_mask_nxv2f64_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2f64_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2f64.nxv2i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10861,11 +13467,33 @@ define @test_vluxseg3_mask_nxv2f64_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2f64_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2f64.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10892,11 +13520,33 @@ define @test_vluxseg3_mask_nxv2f64_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2f64_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2f64.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -10923,11 +13573,33 @@ define @test_vluxseg3_mask_nxv2f64_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2f64_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2f64.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11053,9 +13725,23 @@ define @test_vluxseg2_mask_nxv4f16_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4f16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4f16.nxv4i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11082,9 +13768,23 @@ define @test_vluxseg2_mask_nxv4f16_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4f16_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4f16.nxv4i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11111,9 +13811,23 @@ define @test_vluxseg2_mask_nxv4f16_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4f16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4f16.nxv4i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11140,11 +13854,25 @@ define @test_vluxseg3_mask_nxv4f16_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4f16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4f16.nxv4i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11171,11 +13899,25 @@ define @test_vluxseg3_mask_nxv4f16_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4f16_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4f16.nxv4i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11202,10 +13944,24 @@ define @test_vluxseg3_mask_nxv4f16_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4f16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4f16.nxv4i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11298,12 +14054,26 @@ define @test_vluxseg4_mask_nxv4f16_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv4f16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg4ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv4f16.nxv4i32( %val, %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11756,9 +14526,23 @@ define @test_vluxseg2_mask_nxv2f16_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2f16_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2f16.nxv2i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11785,9 +14569,23 @@ define @test_vluxseg2_mask_nxv2f16_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2f16_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2f16.nxv2i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11814,9 +14612,23 @@ define @test_vluxseg2_mask_nxv2f16_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2f16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2f16.nxv2i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11843,11 +14655,25 @@ define @test_vluxseg3_mask_nxv2f16_nxv2i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2f16_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2f16.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11874,11 +14700,25 @@ define @test_vluxseg3_mask_nxv2f16_nxv2i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2f16_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2f16.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -11905,11 +14745,25 @@ define @test_vluxseg3_mask_nxv2f16_nxv2i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2f16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2f16.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -12461,9 +15315,31 @@ define @test_vluxseg2_mask_nxv4f32_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4f32_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4f32.nxv4i16( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -12490,9 +15366,31 @@ define @test_vluxseg2_mask_nxv4f32_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4f32_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4f32.nxv4i8( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -12519,9 +15417,31 @@ define @test_vluxseg2_mask_nxv4f32_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4f32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4f32.nxv4i32( %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -12548,11 +15468,33 @@ define @test_vluxseg3_mask_nxv4f32_nxv4i16( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4f32_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4f32.nxv4i16( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -12579,11 +15521,33 @@ define @test_vluxseg3_mask_nxv4f32_nxv4i8( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4f32_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4f32.nxv4i8( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) @@ -12610,11 +15574,33 @@ define @test_vluxseg3_mask_nxv4f32_nxv4i32( %val, ptr %base, %index, i32 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4f32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4f32.nxv4i32( %val, %val, %val, ptr %base, %index, %mask, i32 %vl, i32 1) diff --git a/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vluxseg-rv64.ll @@ -21,9 +21,53 @@ define @test_vluxseg2_mask_nxv16i16_nxv16i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv16i16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv16i16.nxv16i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -50,9 +94,53 @@ define @test_vluxseg2_mask_nxv16i16_nxv16i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv16i16_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv16i16.nxv16i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -79,9 +167,53 @@ define @test_vluxseg2_mask_nxv16i16_nxv16i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv16i16_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v4, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv16i16.nxv16i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -108,9 +240,31 @@ define @test_vluxseg2_mask_nxv4i32_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i32.nxv4i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -137,9 +291,31 @@ define @test_vluxseg2_mask_nxv4i32_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i32_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i32.nxv4i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -166,9 +342,31 @@ define @test_vluxseg2_mask_nxv4i32_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i32_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i32.nxv4i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -195,9 +393,31 @@ define @test_vluxseg2_mask_nxv4i32_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i32.nxv4i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -224,11 +444,33 @@ define @test_vluxseg3_mask_nxv4i32_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4i32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4i32.nxv4i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -255,11 +497,33 @@ define @test_vluxseg3_mask_nxv4i32_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4i32_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4i32.nxv4i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -286,10 +550,32 @@ define @test_vluxseg3_mask_nxv4i32_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4i32_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4i32.nxv4i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -316,11 +602,33 @@ define @test_vluxseg3_mask_nxv4i32_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4i32_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4i32.nxv4i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -413,12 +721,34 @@ define @test_vluxseg4_mask_nxv4i32_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv4i32_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv4r.v v16, v12 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg4ei64.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv4i32.nxv4i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -478,9 +808,31 @@ define @test_vluxseg2_mask_nxv16i8_nxv16i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv16i8_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv16i8.nxv16i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -507,9 +859,31 @@ define @test_vluxseg2_mask_nxv16i8_nxv16i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv16i8.nxv16i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -536,9 +910,31 @@ define @test_vluxseg2_mask_nxv16i8_nxv16i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv16i8_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv16i8.nxv16i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -565,10 +961,32 @@ define @test_vluxseg3_mask_nxv16i8_nxv16i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv16i8_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv16i8.nxv16i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -595,11 +1013,33 @@ define @test_vluxseg3_mask_nxv16i8_nxv16i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv16i8_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv16i8.nxv16i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -626,10 +1066,32 @@ define @test_vluxseg3_mask_nxv16i8_nxv16i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv16i8_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv16i8.nxv16i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -656,12 +1118,34 @@ define @test_vluxseg4_mask_nxv16i8_nxv16i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv16i8_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv4r.v v16, v12 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vluxseg4ei16.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv16i8.nxv16i16( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -721,11 +1205,33 @@ define @test_vluxseg4_mask_nxv16i8_nxv16i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv16i8_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu ; CHECK-NEXT: vluxseg4ei32.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv16i8.nxv16i32( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -752,9 +1258,23 @@ define @test_vluxseg2_mask_nxv1i64_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i64.nxv1i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -781,9 +1301,23 @@ define @test_vluxseg2_mask_nxv1i64_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i64_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i64.nxv1i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -810,9 +1344,23 @@ define @test_vluxseg2_mask_nxv1i64_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i64_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i64.nxv1i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -839,9 +1387,23 @@ define @test_vluxseg2_mask_nxv1i64_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i64_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i64.nxv1i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -868,11 +1430,25 @@ define @test_vluxseg3_mask_nxv1i64_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i64_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i64.nxv1i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -899,11 +1475,25 @@ define @test_vluxseg3_mask_nxv1i64_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i64_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i64.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -930,11 +1520,25 @@ define @test_vluxseg3_mask_nxv1i64_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i64_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i64.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -961,11 +1565,25 @@ define @test_vluxseg3_mask_nxv1i64_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i64_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i64.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -1692,9 +2310,23 @@ define @test_vluxseg2_mask_nxv1i32_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i32_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i32.nxv1i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -1721,9 +2353,23 @@ define @test_vluxseg2_mask_nxv1i32_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i32.nxv1i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -1750,9 +2396,23 @@ define @test_vluxseg2_mask_nxv1i32_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i32.nxv1i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -1779,9 +2439,23 @@ define @test_vluxseg2_mask_nxv1i32_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i32_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i32.nxv1i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -1808,11 +2482,25 @@ define @test_vluxseg3_mask_nxv1i32_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i32_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i32.nxv1i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -1839,11 +2527,25 @@ define @test_vluxseg3_mask_nxv1i32_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i32.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -1870,11 +2572,25 @@ define @test_vluxseg3_mask_nxv1i32_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i32_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i32.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -1901,11 +2617,25 @@ define @test_vluxseg3_mask_nxv1i32_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i32_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i32.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2632,9 +3362,31 @@ define @test_vluxseg2_mask_nxv8i16_nxv8i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8i16.nxv8i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2661,9 +3413,31 @@ define @test_vluxseg2_mask_nxv8i16_nxv8i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8i16.nxv8i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2690,9 +3464,31 @@ define @test_vluxseg2_mask_nxv8i16_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8i16_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8i16.nxv8i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2719,9 +3515,31 @@ define @test_vluxseg2_mask_nxv8i16_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8i16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8i16.nxv8i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2748,11 +3566,33 @@ define @test_vluxseg3_mask_nxv8i16_nxv8i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv8i16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv8i16.nxv8i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2779,11 +3619,33 @@ define @test_vluxseg3_mask_nxv8i16_nxv8i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv8i16_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv8i16.nxv8i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2810,10 +3672,32 @@ define @test_vluxseg3_mask_nxv8i16_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv8i16_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv8i16.nxv8i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2840,10 +3724,32 @@ define @test_vluxseg3_mask_nxv8i16_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv8i16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv8i16.nxv8i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2936,11 +3842,33 @@ define @test_vluxseg4_mask_nxv8i16_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv8i16_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg4ei64.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv8i16.nxv8i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2967,12 +3895,34 @@ define @test_vluxseg4_mask_nxv8i16_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv8i16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv4r.v v16, v12 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg4ei32.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv8i16.nxv8i32( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -2999,9 +3949,23 @@ define @test_vluxseg2_mask_nxv4i8_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i8_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i8.nxv4i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3028,9 +3992,23 @@ define @test_vluxseg2_mask_nxv4i8_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i8.nxv4i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3057,9 +4035,23 @@ define @test_vluxseg2_mask_nxv4i8_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i8_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i8.nxv4i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3086,9 +4078,23 @@ define @test_vluxseg2_mask_nxv4i8_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i8_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i8.nxv4i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3115,10 +4121,24 @@ define @test_vluxseg3_mask_nxv4i8_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4i8_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4i8.nxv4i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3145,11 +4165,25 @@ define @test_vluxseg3_mask_nxv4i8_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4i8_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4i8.nxv4i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3176,10 +4210,24 @@ define @test_vluxseg3_mask_nxv4i8_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4i8_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4i8.nxv4i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3206,11 +4254,25 @@ define @test_vluxseg3_mask_nxv4i8_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4i8_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4i8.nxv4i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3237,12 +4299,26 @@ define @test_vluxseg4_mask_nxv4i8_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv4i8_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vluxseg4ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv4i8.nxv4i32( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3302,11 +4378,25 @@ define @test_vluxseg4_mask_nxv4i8_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv4i8_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vluxseg4ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv4i8.nxv4i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3434,12 +4524,26 @@ define @test_vluxseg5_mask_nxv4i8_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg5_mask_nxv4i8_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vmv1r.v v11, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vluxseg5ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vluxseg5.mask.nxv4i8.nxv4i64( %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3570,6 +4674,14 @@ define @test_vluxseg6_mask_nxv4i8_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg6_mask_nxv4i8_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -3578,6 +4690,12 @@ ; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, mu ; CHECK-NEXT: vluxseg6ei64.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vluxseg6.mask.nxv4i8.nxv4i64( %val, %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3931,9 +5049,23 @@ define @test_vluxseg2_mask_nxv1i16_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i16_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i16.nxv1i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3960,9 +5092,23 @@ define @test_vluxseg2_mask_nxv1i16_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i16_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i16.nxv1i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -3989,9 +5135,23 @@ define @test_vluxseg2_mask_nxv1i16_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i16.nxv1i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -4018,9 +5178,23 @@ define @test_vluxseg2_mask_nxv1i16_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i16.nxv1i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -4047,11 +5221,25 @@ define @test_vluxseg3_mask_nxv1i16_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i16_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i16.nxv1i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -4078,11 +5266,25 @@ define @test_vluxseg3_mask_nxv1i16_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i16_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i16.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -4109,11 +5311,25 @@ define @test_vluxseg3_mask_nxv1i16_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i16.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -4140,11 +5356,25 @@ define @test_vluxseg3_mask_nxv1i16_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i16_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i16.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -4871,9 +6101,23 @@ define @test_vluxseg2_mask_nxv2i32_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i32.nxv2i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -4900,9 +6144,23 @@ define @test_vluxseg2_mask_nxv2i32_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i32_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i32.nxv2i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -4929,9 +6187,23 @@ define @test_vluxseg2_mask_nxv2i32_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i32.nxv2i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -4958,9 +6230,23 @@ define @test_vluxseg2_mask_nxv2i32_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i32_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i32.nxv2i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -4987,11 +6273,25 @@ define @test_vluxseg3_mask_nxv2i32_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i32.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5018,11 +6318,25 @@ define @test_vluxseg3_mask_nxv2i32_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i32_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i32.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5049,11 +6363,25 @@ define @test_vluxseg3_mask_nxv2i32_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i32_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i32.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5080,10 +6408,24 @@ define @test_vluxseg3_mask_nxv2i32_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i32_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i32.nxv2i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5209,12 +6551,26 @@ define @test_vluxseg4_mask_nxv2i32_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv2i32_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg4ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv2i32.nxv2i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5809,9 +7165,23 @@ define @test_vluxseg2_mask_nxv8i8_nxv8i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8i8_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8i8.nxv8i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5838,9 +7208,23 @@ define @test_vluxseg2_mask_nxv8i8_nxv8i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8i8.nxv8i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5867,9 +7251,23 @@ define @test_vluxseg2_mask_nxv8i8_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8i8_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8i8.nxv8i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5896,9 +7294,23 @@ define @test_vluxseg2_mask_nxv8i8_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8i8_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8i8.nxv8i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5925,10 +7337,24 @@ define @test_vluxseg3_mask_nxv8i8_nxv8i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv8i8_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv8i8.nxv8i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5955,11 +7381,25 @@ define @test_vluxseg3_mask_nxv8i8_nxv8i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv8i8_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv8i8.nxv8i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -5986,10 +7426,24 @@ define @test_vluxseg3_mask_nxv8i8_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv8i8_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv8i8.nxv8i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6016,10 +7470,24 @@ define @test_vluxseg3_mask_nxv8i8_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv8i8_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv8i8.nxv8i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6046,12 +7514,26 @@ define @test_vluxseg4_mask_nxv8i8_nxv8i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv8i8_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg4ei16.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv8i8.nxv8i16( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6111,11 +7593,25 @@ define @test_vluxseg4_mask_nxv8i8_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv8i8_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg4ei64.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv8i8.nxv8i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6142,11 +7638,25 @@ define @test_vluxseg4_mask_nxv8i8_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv8i8_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg4ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv8i8.nxv8i32( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6241,12 +7751,26 @@ define @test_vluxseg5_mask_nxv8i8_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg5_mask_nxv8i8_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vmv1r.v v11, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg5ei64.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vluxseg5.mask.nxv8i8.nxv8i64( %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6273,12 +7797,26 @@ define @test_vluxseg5_mask_nxv8i8_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg5_mask_nxv8i8_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vmv1r.v v11, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg5ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vluxseg5.mask.nxv8i8.nxv8i32( %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6375,6 +7913,14 @@ define @test_vluxseg6_mask_nxv8i8_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg6_mask_nxv8i8_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -6382,6 +7928,12 @@ ; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg6ei64.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vluxseg6.mask.nxv8i8.nxv8i64( %val, %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6408,6 +7960,14 @@ define @test_vluxseg6_mask_nxv8i8_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg6_mask_nxv8i8_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -6416,6 +7976,12 @@ ; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg6ei32.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vluxseg6.mask.nxv8i8.nxv8i32( %val, %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6514,6 +8080,14 @@ define @test_vluxseg7_mask_nxv8i8_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg7_mask_nxv8i8_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -6522,6 +8096,12 @@ ; CHECK-NEXT: vmv1r.v v13, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg7ei64.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,} @llvm.riscv.vluxseg7.mask.nxv8i8.nxv8i64( %val, %val, %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6658,6 +8238,14 @@ define @test_vluxseg8_mask_nxv8i8_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg8_mask_nxv8i8_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -6667,6 +8255,12 @@ ; CHECK-NEXT: vmv1r.v v14, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, mu ; CHECK-NEXT: vluxseg8ei64.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,,,} @llvm.riscv.vluxseg8.mask.nxv8i8.nxv8i64( %val, %val, %val, %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6730,9 +8324,53 @@ define @test_vluxseg2_mask_nxv4i64_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i64_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i64.nxv4i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6759,9 +8397,53 @@ define @test_vluxseg2_mask_nxv4i64_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i64_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i64.nxv4i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6788,9 +8470,53 @@ define @test_vluxseg2_mask_nxv4i64_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i64.nxv4i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6817,9 +8543,53 @@ define @test_vluxseg2_mask_nxv4i64_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i64_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i64.nxv4i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6846,9 +8616,23 @@ define @test_vluxseg2_mask_nxv4i16_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i16.nxv4i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6875,9 +8659,23 @@ define @test_vluxseg2_mask_nxv4i16_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i16.nxv4i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6904,9 +8702,23 @@ define @test_vluxseg2_mask_nxv4i16_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i16_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i16.nxv4i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6933,9 +8745,23 @@ define @test_vluxseg2_mask_nxv4i16_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4i16.nxv4i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6962,10 +8788,24 @@ define @test_vluxseg3_mask_nxv4i16_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4i16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4i16.nxv4i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -6992,11 +8832,25 @@ define @test_vluxseg3_mask_nxv4i16_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4i16_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4i16.nxv4i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7023,10 +8877,24 @@ define @test_vluxseg3_mask_nxv4i16_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4i16_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4i16.nxv4i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7053,11 +8921,25 @@ define @test_vluxseg3_mask_nxv4i16_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4i16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4i16.nxv4i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7084,12 +8966,26 @@ define @test_vluxseg4_mask_nxv4i16_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv4i16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg4ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv4i16.nxv4i32( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7149,11 +9045,25 @@ define @test_vluxseg4_mask_nxv4i16_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv4i16_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg4ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv4i16.nxv4i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7281,12 +9191,26 @@ define @test_vluxseg5_mask_nxv4i16_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg5_mask_nxv4i16_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vmv1r.v v11, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg5ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vluxseg5.mask.nxv4i16.nxv4i64( %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7417,6 +9341,14 @@ define @test_vluxseg6_mask_nxv4i16_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg6_mask_nxv4i16_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -7425,6 +9357,12 @@ ; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg6ei64.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vluxseg6.mask.nxv4i16.nxv4i64( %val, %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7778,9 +9716,23 @@ define @test_vluxseg2_mask_nxv1i8_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i8_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i8.nxv1i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7807,9 +9759,23 @@ define @test_vluxseg2_mask_nxv1i8_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i8_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i8.nxv1i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7836,9 +9802,23 @@ define @test_vluxseg2_mask_nxv1i8_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i8_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i8.nxv1i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7865,9 +9845,23 @@ define @test_vluxseg2_mask_nxv1i8_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1i8.nxv1i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7894,11 +9888,25 @@ define @test_vluxseg3_mask_nxv1i8_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i8_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i8.nxv1i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7925,11 +9933,25 @@ define @test_vluxseg3_mask_nxv1i8_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i8_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i8.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7956,11 +9978,25 @@ define @test_vluxseg3_mask_nxv1i8_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i8_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i8.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -7987,11 +10023,25 @@ define @test_vluxseg3_mask_nxv1i8_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1i8_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1i8.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -8718,9 +10768,23 @@ define @test_vluxseg2_mask_nxv2i8_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i8_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i8.nxv2i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -8747,9 +10811,23 @@ define @test_vluxseg2_mask_nxv2i8_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i8.nxv2i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -8776,9 +10854,23 @@ define @test_vluxseg2_mask_nxv2i8_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i8_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i8.nxv2i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -8805,9 +10897,23 @@ define @test_vluxseg2_mask_nxv2i8_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i8_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i8.nxv2i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -8834,11 +10940,25 @@ define @test_vluxseg3_mask_nxv2i8_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i8_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i8.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -8865,11 +10985,25 @@ define @test_vluxseg3_mask_nxv2i8_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i8_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i8.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -8896,11 +11030,25 @@ define @test_vluxseg3_mask_nxv2i8_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i8_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i8.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -8927,10 +11075,24 @@ define @test_vluxseg3_mask_nxv2i8_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i8_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i8.nxv2i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9056,12 +11218,26 @@ define @test_vluxseg4_mask_nxv2i8_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv2i8_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, mu ; CHECK-NEXT: vluxseg4ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv2i8.nxv2i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9656,9 +11832,53 @@ define @test_vluxseg2_mask_nxv8i32_nxv8i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8i32_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8i32.nxv8i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9685,9 +11905,53 @@ define @test_vluxseg2_mask_nxv8i32_nxv8i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8i32_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8i32.nxv8i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9714,9 +11978,53 @@ define @test_vluxseg2_mask_nxv8i32_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8i32_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v4, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8i32.nxv8i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9743,9 +12051,53 @@ define @test_vluxseg2_mask_nxv8i32_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8i32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8i32.nxv8i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9772,9 +12124,53 @@ define @test_vluxseg2_mask_nxv32i8_nxv32i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv32i8_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv32i8.nxv32i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9801,9 +12197,53 @@ define @test_vluxseg2_mask_nxv32i8_nxv32i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv32i8_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv32i8.nxv32i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9830,9 +12270,23 @@ define @test_vluxseg2_mask_nxv2i16_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i16_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i16.nxv2i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9859,9 +12313,23 @@ define @test_vluxseg2_mask_nxv2i16_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i16.nxv2i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9888,9 +12356,23 @@ define @test_vluxseg2_mask_nxv2i16_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i16.nxv2i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9917,9 +12399,23 @@ define @test_vluxseg2_mask_nxv2i16_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i16_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i16.nxv2i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9946,11 +12442,25 @@ define @test_vluxseg3_mask_nxv2i16_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i16_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i16.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -9977,11 +12487,25 @@ define @test_vluxseg3_mask_nxv2i16_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i16_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i16.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10008,11 +12532,25 @@ define @test_vluxseg3_mask_nxv2i16_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i16.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10039,10 +12577,24 @@ define @test_vluxseg3_mask_nxv2i16_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i16_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i16.nxv2i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10168,12 +12720,26 @@ define @test_vluxseg4_mask_nxv2i16_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv2i16_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg4ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv2i16.nxv2i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10768,9 +13334,31 @@ define @test_vluxseg2_mask_nxv2i64_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i64_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i64.nxv2i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10797,9 +13385,31 @@ define @test_vluxseg2_mask_nxv2i64_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i64_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i64.nxv2i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10826,9 +13436,31 @@ define @test_vluxseg2_mask_nxv2i64_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i64_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i64.nxv2i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10855,9 +13487,31 @@ define @test_vluxseg2_mask_nxv2i64_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2i64.nxv2i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10884,11 +13538,33 @@ define @test_vluxseg3_mask_nxv2i64_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i64_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i64.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10915,11 +13591,33 @@ define @test_vluxseg3_mask_nxv2i64_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i64_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i64.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10946,11 +13644,33 @@ define @test_vluxseg3_mask_nxv2i64_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i64_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i64.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -10977,11 +13697,33 @@ define @test_vluxseg3_mask_nxv2i64_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2i64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2i64.nxv2i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11140,9 +13882,53 @@ define @test_vluxseg2_mask_nxv16f16_nxv16i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv16f16_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv16f16.nxv16i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11169,9 +13955,53 @@ define @test_vluxseg2_mask_nxv16f16_nxv16i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv16f16_nxv16i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv16f16.nxv16i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11198,9 +14028,53 @@ define @test_vluxseg2_mask_nxv16f16_nxv16i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv16f16_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v4, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv16f16.nxv16i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11227,9 +14101,53 @@ define @test_vluxseg2_mask_nxv4f64_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4f64_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4f64.nxv4i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11256,9 +14174,53 @@ define @test_vluxseg2_mask_nxv4f64_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4f64_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4f64.nxv4i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11285,9 +14247,53 @@ define @test_vluxseg2_mask_nxv4f64_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4f64_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4f64.nxv4i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11314,9 +14320,53 @@ define @test_vluxseg2_mask_nxv4f64_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4f64_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4f64.nxv4i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11343,9 +14393,23 @@ define @test_vluxseg2_mask_nxv1f64_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1f64_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1f64.nxv1i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11372,9 +14436,23 @@ define @test_vluxseg2_mask_nxv1f64_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1f64_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1f64.nxv1i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11401,9 +14479,23 @@ define @test_vluxseg2_mask_nxv1f64_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1f64_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1f64.nxv1i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11430,9 +14522,23 @@ define @test_vluxseg2_mask_nxv1f64_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1f64_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1f64.nxv1i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11459,11 +14565,25 @@ define @test_vluxseg3_mask_nxv1f64_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1f64_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1f64.nxv1i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11490,11 +14610,25 @@ define @test_vluxseg3_mask_nxv1f64_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1f64_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1f64.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11521,11 +14655,25 @@ define @test_vluxseg3_mask_nxv1f64_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1f64_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1f64.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -11552,11 +14700,25 @@ define @test_vluxseg3_mask_nxv1f64_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1f64_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1f64.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -12283,9 +15445,23 @@ define @test_vluxseg2_mask_nxv2f32_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2f32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2f32.nxv2i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -12312,9 +15488,23 @@ define @test_vluxseg2_mask_nxv2f32_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2f32_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2f32.nxv2i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -12341,9 +15531,23 @@ define @test_vluxseg2_mask_nxv2f32_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2f32_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2f32.nxv2i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -12370,9 +15574,23 @@ define @test_vluxseg2_mask_nxv2f32_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2f32_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2f32.nxv2i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -12399,11 +15617,25 @@ define @test_vluxseg3_mask_nxv2f32_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2f32_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2f32.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -12430,11 +15662,25 @@ define @test_vluxseg3_mask_nxv2f32_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2f32_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2f32.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -12461,11 +15707,25 @@ define @test_vluxseg3_mask_nxv2f32_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2f32_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2f32.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -12492,10 +15752,24 @@ define @test_vluxseg3_mask_nxv2f32_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2f32_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2f32.nxv2i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -12621,12 +15895,26 @@ define @test_vluxseg4_mask_nxv2f32_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv2f32_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, mu ; CHECK-NEXT: vluxseg4ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv2f32.nxv2i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -13221,9 +16509,23 @@ define @test_vluxseg2_mask_nxv1f16_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1f16_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1f16.nxv1i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -13250,9 +16552,23 @@ define @test_vluxseg2_mask_nxv1f16_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1f16_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1f16.nxv1i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -13279,9 +16595,23 @@ define @test_vluxseg2_mask_nxv1f16_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1f16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1f16.nxv1i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -13308,9 +16638,23 @@ define @test_vluxseg2_mask_nxv1f16_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1f16_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1f16.nxv1i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -13337,11 +16681,25 @@ define @test_vluxseg3_mask_nxv1f16_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1f16_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1f16.nxv1i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -13368,11 +16726,25 @@ define @test_vluxseg3_mask_nxv1f16_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1f16_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1f16.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -13399,11 +16771,25 @@ define @test_vluxseg3_mask_nxv1f16_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1f16_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1f16.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -13430,11 +16816,25 @@ define @test_vluxseg3_mask_nxv1f16_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1f16_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1f16.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -14161,9 +17561,23 @@ define @test_vluxseg2_mask_nxv1f32_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1f32_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1f32.nxv1i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -14190,9 +17604,23 @@ define @test_vluxseg2_mask_nxv1f32_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1f32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1f32.nxv1i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -14219,9 +17647,23 @@ define @test_vluxseg2_mask_nxv1f32_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1f32_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1f32.nxv1i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -14248,9 +17690,23 @@ define @test_vluxseg2_mask_nxv1f32_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv1f32_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv1f32.nxv1i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -14277,11 +17733,25 @@ define @test_vluxseg3_mask_nxv1f32_nxv1i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1f32_nxv1i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1f32.nxv1i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -14308,11 +17778,25 @@ define @test_vluxseg3_mask_nxv1f32_nxv1i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1f32_nxv1i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1f32.nxv1i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -14339,11 +17823,25 @@ define @test_vluxseg3_mask_nxv1f32_nxv1i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1f32_nxv1i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1f32.nxv1i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -14370,11 +17868,25 @@ define @test_vluxseg3_mask_nxv1f32_nxv1i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv1f32_nxv1i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv1f32.nxv1i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15101,9 +18613,31 @@ define @test_vluxseg2_mask_nxv8f16_nxv8i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8f16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8f16.nxv8i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15130,9 +18664,31 @@ define @test_vluxseg2_mask_nxv8f16_nxv8i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8f16_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8f16.nxv8i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15159,9 +18715,31 @@ define @test_vluxseg2_mask_nxv8f16_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8f16_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8f16.nxv8i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15188,9 +18766,31 @@ define @test_vluxseg2_mask_nxv8f16_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8f16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8f16.nxv8i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15217,11 +18817,33 @@ define @test_vluxseg3_mask_nxv8f16_nxv8i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv8f16_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv8f16.nxv8i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15248,11 +18870,33 @@ define @test_vluxseg3_mask_nxv8f16_nxv8i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv8f16_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv8f16.nxv8i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15279,10 +18923,32 @@ define @test_vluxseg3_mask_nxv8f16_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv8f16_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv8f16.nxv8i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15309,10 +18975,32 @@ define @test_vluxseg3_mask_nxv8f16_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv8f16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv8f16.nxv8i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15405,11 +19093,33 @@ define @test_vluxseg4_mask_nxv8f16_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv8f16_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg4ei64.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv8f16.nxv8i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15436,12 +19146,34 @@ define @test_vluxseg4_mask_nxv8f16_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv8f16_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv4r.v v16, v12 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, mu ; CHECK-NEXT: vluxseg4ei32.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv8f16.nxv8i32( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15468,9 +19200,53 @@ define @test_vluxseg2_mask_nxv8f32_nxv8i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8f32_nxv8i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8f32.nxv8i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15497,9 +19273,53 @@ define @test_vluxseg2_mask_nxv8f32_nxv8i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8f32_nxv8i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8f32.nxv8i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15526,9 +19346,53 @@ define @test_vluxseg2_mask_nxv8f32_nxv8i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8f32_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v4, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8f32.nxv8i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15555,9 +19419,53 @@ define @test_vluxseg2_mask_nxv8f32_nxv8i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv8f32_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v4, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv8f32.nxv8i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15584,9 +19492,31 @@ define @test_vluxseg2_mask_nxv2f64_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2f64_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2f64.nxv2i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15613,9 +19543,31 @@ define @test_vluxseg2_mask_nxv2f64_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2f64_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2f64.nxv2i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15642,9 +19594,31 @@ define @test_vluxseg2_mask_nxv2f64_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2f64_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2f64.nxv2i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15671,9 +19645,31 @@ define @test_vluxseg2_mask_nxv2f64_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2f64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2f64.nxv2i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15700,11 +19696,33 @@ define @test_vluxseg3_mask_nxv2f64_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2f64_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2f64.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15731,11 +19749,33 @@ define @test_vluxseg3_mask_nxv2f64_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2f64_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2f64.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15762,11 +19802,33 @@ define @test_vluxseg3_mask_nxv2f64_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2f64_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2f64.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15793,11 +19855,33 @@ define @test_vluxseg3_mask_nxv2f64_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2f64_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2f64.nxv2i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15956,9 +20040,23 @@ define @test_vluxseg2_mask_nxv4f16_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4f16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4f16.nxv4i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -15985,9 +20083,23 @@ define @test_vluxseg2_mask_nxv4f16_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4f16_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4f16.nxv4i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16014,9 +20126,23 @@ define @test_vluxseg2_mask_nxv4f16_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4f16_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4f16.nxv4i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16043,9 +20169,23 @@ define @test_vluxseg2_mask_nxv4f16_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4f16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4f16.nxv4i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16072,10 +20212,24 @@ define @test_vluxseg3_mask_nxv4f16_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4f16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4f16.nxv4i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16102,11 +20256,25 @@ define @test_vluxseg3_mask_nxv4f16_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4f16_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4f16.nxv4i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16133,10 +20301,24 @@ define @test_vluxseg3_mask_nxv4f16_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4f16_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4f16.nxv4i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16163,11 +20345,25 @@ define @test_vluxseg3_mask_nxv4f16_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4f16_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4f16.nxv4i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16194,12 +20390,26 @@ define @test_vluxseg4_mask_nxv4f16_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv4f16_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg4ei32.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv4f16.nxv4i32( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16259,11 +20469,25 @@ define @test_vluxseg4_mask_nxv4f16_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv4f16_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg4ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv4f16.nxv4i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16391,12 +20615,26 @@ define @test_vluxseg5_mask_nxv4f16_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg5_mask_nxv4f16_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vmv1r.v v11, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg5ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,} @llvm.riscv.vluxseg5.mask.nxv4f16.nxv4i64( %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16527,6 +20765,14 @@ define @test_vluxseg6_mask_nxv4f16_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg6_mask_nxv4f16_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 @@ -16535,6 +20781,12 @@ ; CHECK-NEXT: vmv1r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, mu ; CHECK-NEXT: vluxseg6ei64.v v7, (a0), v16, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,,,} @llvm.riscv.vluxseg6.mask.nxv4f16.nxv4i64( %val, %val, %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16888,9 +21140,23 @@ define @test_vluxseg2_mask_nxv2f16_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2f16_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2f16.nxv2i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16917,9 +21183,23 @@ define @test_vluxseg2_mask_nxv2f16_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2f16_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2f16.nxv2i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16946,9 +21226,23 @@ define @test_vluxseg2_mask_nxv2f16_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2f16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v7, (a0), v9, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2f16.nxv2i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -16975,9 +21269,23 @@ define @test_vluxseg2_mask_nxv2f16_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv2f16_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv2f16.nxv2i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17004,11 +21312,25 @@ define @test_vluxseg3_mask_nxv2f16_nxv2i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2f16_nxv2i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2f16.nxv2i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17035,11 +21357,25 @@ define @test_vluxseg3_mask_nxv2f16_nxv2i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2f16_nxv2i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2f16.nxv2i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17066,11 +21402,25 @@ define @test_vluxseg3_mask_nxv2f16_nxv2i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2f16_nxv2i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v10, v9 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2f16.nxv2i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17097,10 +21447,24 @@ define @test_vluxseg3_mask_nxv2f16_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv2f16_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v7, (a0), v10, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv2f16.nxv2i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17226,12 +21590,26 @@ define @test_vluxseg4_mask_nxv2f16_nxv2i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv2f16_nxv2i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v7, v8 ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, mu ; CHECK-NEXT: vluxseg4ei64.v v7, (a0), v12, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv2f16.nxv2i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17826,9 +22204,31 @@ define @test_vluxseg2_mask_nxv4f32_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4f32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg2ei32.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4f32.nxv4i32( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17855,9 +22255,31 @@ define @test_vluxseg2_mask_nxv4f32_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4f32_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg2ei8.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4f32.nxv4i8( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17884,9 +22306,31 @@ define @test_vluxseg2_mask_nxv4f32_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4f32_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg2ei64.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4f32.nxv4i64( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17913,9 +22357,31 @@ define @test_vluxseg2_mask_nxv4f32_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg2_mask_nxv4f32_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg2ei16.v v6, (a0), v10, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vluxseg2.mask.nxv4f32.nxv4i16( %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17942,11 +22408,33 @@ define @test_vluxseg3_mask_nxv4f32_nxv4i32( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4f32_nxv4i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg3ei32.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4f32.nxv4i32( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -17973,11 +22461,33 @@ define @test_vluxseg3_mask_nxv4f32_nxv4i8( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4f32_nxv4i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg3ei8.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4f32.nxv4i8( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -18004,10 +22514,32 @@ define @test_vluxseg3_mask_nxv4f32_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4f32_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg3ei64.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4f32.nxv4i64( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -18034,11 +22566,33 @@ define @test_vluxseg3_mask_nxv4f32_nxv4i16( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg3_mask_nxv4f32_nxv4i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv1r.v v12, v10 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg3ei16.v v6, (a0), v12, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,} @llvm.riscv.vluxseg3.mask.nxv4f32.nxv4i16( %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) @@ -18131,12 +22685,34 @@ define @test_vluxseg4_mask_nxv4f32_nxv4i64( %val, ptr %base, %index, i64 %vl, %mask) { ; CHECK-LABEL: test_vluxseg4_mask_nxv4f32_nxv4i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv2r.v v6, v8 ; CHECK-NEXT: vmv2r.v v10, v8 ; CHECK-NEXT: vmv4r.v v16, v12 ; CHECK-NEXT: vmv2r.v v12, v8 ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, mu ; CHECK-NEXT: vluxseg4ei64.v v6, (a0), v16, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,,,} @llvm.riscv.vluxseg4.mask.nxv4f32.nxv4i64( %val, %val, %val, %val, ptr %base, %index, %mask, i64 %vl, i64 1) diff --git a/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmacc-vp.ll @@ -618,10 +618,100 @@ define @vmacc_vv_nxv64i8( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vv_nxv64i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, mu ; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -634,10 +724,100 @@ define @vmacc_vv_nxv64i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vv_nxv64i8_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma ; CHECK-NEXT: vmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -684,10 +864,100 @@ define @vmacc_vv_nxv64i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vv_nxv64i8_ta: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1227,10 +1497,100 @@ define @vmacc_vv_nxv32i16( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vv_nxv32i16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu ; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1243,10 +1603,100 @@ define @vmacc_vv_nxv32i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vv_nxv32i16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma ; CHECK-NEXT: vmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1293,10 +1743,100 @@ define @vmacc_vv_nxv32i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vv_nxv32i16_ta: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1735,10 +2275,100 @@ define @vmacc_vv_nxv16i32( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vv_nxv16i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu ; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1751,10 +2381,100 @@ define @vmacc_vv_nxv16i32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vv_nxv16i32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma ; CHECK-NEXT: vmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1801,10 +2521,100 @@ define @vmacc_vv_nxv16i32_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vv_nxv16i32_ta: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -2277,10 +3087,100 @@ define @vmacc_vv_nxv8i64( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vv_nxv8i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu ; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -2293,10 +3193,100 @@ define @vmacc_vv_nxv8i64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vv_nxv8i64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma ; CHECK-NEXT: vmacc.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -2311,6 +3301,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -2319,6 +3353,49 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, mu ; RV32-NEXT: vmacc.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -2343,6 +3420,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -2351,6 +3472,49 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, ma ; RV32-NEXT: vmacc.vv v16, v8, v24 ; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -2373,10 +3537,100 @@ define @vmacc_vv_nxv8i64_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmacc_vv_nxv8i64_ta: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vmacc.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -2391,6 +3645,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -2399,6 +3697,49 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vmacc.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv.v.v v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in.ll b/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmadc.carry.in.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vmadc.carry.in.nxv1i8.nxv1i8( , @@ -150,9 +150,21 @@ define @intrinsic_vmadc.carry.in_vvm_nxv64i1_nxv64i8_nxv64i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv64i1_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vmadc.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmadc.carry.in.nxv64i8.nxv64i8( @@ -288,9 +300,21 @@ define @intrinsic_vmadc.carry.in_vvm_nxv32i1_nxv32i16_nxv32i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv32i1_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vmadc.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmadc.carry.in.nxv32i16.nxv32i16( @@ -403,9 +427,21 @@ define @intrinsic_vmadc.carry.in_vvm_nxv16i1_nxv16i32_nxv16i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv16i1_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmadc.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmadc.carry.in.nxv16i32.nxv16i32( @@ -495,9 +531,21 @@ define @intrinsic_vmadc.carry.in_vvm_nxv8i1_nxv8i64_nxv8i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmadc.carry.in_vvm_nxv8i1_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmadc.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmadc.carry.in.nxv8i64.nxv8i64( @@ -1041,6 +1089,49 @@ ; RV32-LABEL: intrinsic_vmadc.carry.in_vxm_nxv8i1_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -1048,6 +1139,49 @@ ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vmadc.vvm v16, v8, v24, v0 ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmadd-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32 \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -target-abi=ilp32 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64 \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -target-abi=lp64 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 ; This tests a mix of vmacc and vmadd by using different operand orders to @@ -154,9 +154,99 @@ define @vmadd_vv_nxv64i8( %va, %vb, %vc) { ; CHECK-LABEL: vmadd_vv_nxv64i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vmacc.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %x = mul %vc, %vb %y = add %x, %va @@ -299,9 +389,99 @@ define @vmadd_vv_nxv32i16( %va, %vb, %vc) { ; CHECK-LABEL: vmadd_vv_nxv32i16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vmacc.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %x = mul %vc, %vb %y = add %x, %va @@ -420,9 +600,99 @@ define @vmadd_vv_nxv16i32( %va, %vb, %vc) { ; CHECK-LABEL: vmadd_vv_nxv16i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vmadd.vv v8, v24, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %x = mul %vc, %va %y = add %x, %vb @@ -556,9 +826,99 @@ define @vmadd_vv_nxv8i64( %va, %vb, %vc) { ; CHECK-LABEL: vmadd_vv_nxv8i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmacc.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %x = mul %vb, %vc %y = add %x, %va @@ -570,12 +930,99 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: sub a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 6 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 2 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 1 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vmacc.vv v8, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll @@ -440,6 +440,14 @@ define @vmax_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmax_vx_nxv128i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) @@ -458,6 +466,12 @@ ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1048,6 +1062,14 @@ define @vmax_vx_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmax_vx_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: srli a3, a2, 2 @@ -1067,6 +1089,12 @@ ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1110,6 +1138,14 @@ define @vmax_vx_nxv32i32_evl_nx8( %va, i32 %b, %m) { ; CHECK-LABEL: vmax_vx_nxv32i32_evl_nx8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 @@ -1129,6 +1165,12 @@ ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmax.vx v8, v8, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1157,6 +1199,14 @@ ; ; RV64-LABEL: vmax_vx_nxv32i32_evl_nx16: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a2, a1, 2 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma @@ -1167,6 +1217,12 @@ ; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vmax.vx v16, v16, a0, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax.ll b/llvm/test/CodeGen/RISCV/rvv/vmax.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmax.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmax.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vmax.nxv1i8.nxv1i8( , @@ -318,9 +318,97 @@ define @intrinsic_vmax_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmax_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vmax.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmax.mask.nxv64i8.nxv64i8( @@ -601,9 +689,97 @@ define @intrinsic_vmax_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmax_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vmax.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmax.mask.nxv32i16.nxv32i16( @@ -837,9 +1013,97 @@ define @intrinsic_vmax_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmax_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vmax.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmax.mask.nxv16i32.nxv16i32( @@ -1026,9 +1290,97 @@ define @intrinsic_vmax_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmax_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vmax.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmax.mask.nxv8i64.nxv8i64( @@ -2146,12 +2498,98 @@ ; RV32-LABEL: intrinsic_vmax_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vmax.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll @@ -442,6 +442,14 @@ define @vmaxu_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmaxu_vx_nxv128i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) @@ -460,6 +468,12 @@ ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1050,6 +1064,14 @@ define @vmaxu_vx_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmaxu_vx_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: srli a3, a2, 2 @@ -1069,6 +1091,12 @@ ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1112,6 +1140,14 @@ define @vmaxu_vx_nxv32i32_evl_nx8( %va, i32 %b, %m) { ; CHECK-LABEL: vmaxu_vx_nxv32i32_evl_nx8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 @@ -1131,6 +1167,12 @@ ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmaxu.vx v8, v8, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1159,6 +1201,14 @@ ; ; RV64-LABEL: vmaxu_vx_nxv32i32_evl_nx16: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a2, a1, 2 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma @@ -1169,6 +1219,12 @@ ; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vmaxu.vx v16, v16, a0, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmaxu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vmaxu.nxv1i8.nxv1i8( , @@ -318,9 +318,97 @@ define @intrinsic_vmaxu_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmaxu_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vmaxu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmaxu.mask.nxv64i8.nxv64i8( @@ -601,9 +689,97 @@ define @intrinsic_vmaxu_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmaxu_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vmaxu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmaxu.mask.nxv32i16.nxv32i16( @@ -837,9 +1013,97 @@ define @intrinsic_vmaxu_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmaxu_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vmaxu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmaxu.mask.nxv16i32.nxv16i32( @@ -1026,9 +1290,97 @@ define @intrinsic_vmaxu_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmaxu_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vmaxu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmaxu.mask.nxv8i64.nxv8i64( @@ -2146,12 +2498,98 @@ ; RV32-LABEL: intrinsic_vmaxu_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vmaxu.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll @@ -440,6 +440,14 @@ define @vmin_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmin_vx_nxv128i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) @@ -458,6 +466,12 @@ ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1048,6 +1062,14 @@ define @vmin_vx_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vmin_vx_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: srli a3, a2, 2 @@ -1067,6 +1089,12 @@ ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1110,6 +1138,14 @@ define @vmin_vx_nxv32i32_evl_nx8( %va, i32 %b, %m) { ; CHECK-LABEL: vmin_vx_nxv32i32_evl_nx8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 @@ -1129,6 +1165,12 @@ ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmin.vx v8, v8, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1157,6 +1199,14 @@ ; ; RV64-LABEL: vmin_vx_nxv32i32_evl_nx16: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a2, a1, 2 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma @@ -1167,6 +1217,12 @@ ; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vmin.vx v16, v16, a0, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin.ll b/llvm/test/CodeGen/RISCV/rvv/vmin.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmin.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vmin.nxv1i8.nxv1i8( , @@ -318,9 +318,97 @@ define @intrinsic_vmin_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmin_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vmin.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmin.mask.nxv64i8.nxv64i8( @@ -601,9 +689,97 @@ define @intrinsic_vmin_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmin_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vmin.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmin.mask.nxv32i16.nxv32i16( @@ -837,9 +1013,97 @@ define @intrinsic_vmin_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmin_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vmin.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmin.mask.nxv16i32.nxv16i32( @@ -1026,9 +1290,97 @@ define @intrinsic_vmin_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmin_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vmin.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmin.mask.nxv8i64.nxv8i64( @@ -2146,12 +2498,98 @@ ; RV32-LABEL: intrinsic_vmin_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vmin.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll @@ -442,6 +442,14 @@ define @vminu_vx_nxv128i8( %va, i8 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vminu_vx_nxv128i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) @@ -460,6 +468,12 @@ ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1050,6 +1064,14 @@ define @vminu_vx_nxv32i32( %va, i32 %b, %m, i32 zeroext %evl) { ; CHECK-LABEL: vminu_vx_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: srli a3, a2, 2 @@ -1069,6 +1091,12 @@ ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1112,6 +1140,14 @@ define @vminu_vx_nxv32i32_evl_nx8( %va, i32 %b, %m) { ; CHECK-LABEL: vminu_vx_nxv32i32_evl_nx8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 @@ -1131,6 +1167,12 @@ ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer @@ -1159,6 +1201,14 @@ ; ; RV64-LABEL: vminu_vx_nxv32i32_evl_nx16: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: srli a2, a1, 2 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma @@ -1169,6 +1219,12 @@ ; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vminu.vx v16, v16, a0, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %elt.head = insertelement poison, i32 %b, i32 0 %vb = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu.ll b/llvm/test/CodeGen/RISCV/rvv/vminu.ll --- a/llvm/test/CodeGen/RISCV/rvv/vminu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vminu.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vminu.nxv1i8.nxv1i8( , @@ -318,9 +318,97 @@ define @intrinsic_vminu_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vminu_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vminu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vminu.mask.nxv64i8.nxv64i8( @@ -601,9 +689,97 @@ define @intrinsic_vminu_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vminu_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vminu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vminu.mask.nxv32i16.nxv32i16( @@ -837,9 +1013,97 @@ define @intrinsic_vminu_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vminu_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vminu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vminu.mask.nxv16i32.nxv16i32( @@ -1026,9 +1290,97 @@ define @intrinsic_vminu_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vminu_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vminu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vminu.mask.nxv8i64.nxv8i64( @@ -2146,12 +2498,98 @@ ; RV32-LABEL: intrinsic_vminu_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vminu.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in.ll b/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsbc.borrow.in.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vmsbc.borrow.in.nxv1i8.nxv1i8( , @@ -150,9 +150,21 @@ define @intrinsic_vmsbc.borrow.in_vvm_nxv64i1_nxv64i8_nxv64i8( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv64i1_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vmsbc.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmsbc.borrow.in.nxv64i8.nxv64i8( @@ -288,9 +300,21 @@ define @intrinsic_vmsbc.borrow.in_vvm_nxv32i1_nxv32i16_nxv32i16( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv32i1_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vmsbc.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmsbc.borrow.in.nxv32i16.nxv32i16( @@ -403,9 +427,21 @@ define @intrinsic_vmsbc.borrow.in_vvm_nxv16i1_nxv16i32_nxv16i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv16i1_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmsbc.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmsbc.borrow.in.nxv16i32.nxv16i32( @@ -495,9 +531,21 @@ define @intrinsic_vmsbc.borrow.in_vvm_nxv8i1_nxv8i64_nxv8i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vmsbc.borrow.in_vvm_nxv8i1_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmsbc.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmsbc.borrow.in.nxv8i64.nxv8i64( @@ -1041,6 +1089,49 @@ ; RV32-LABEL: intrinsic_vmsbc.borrow.in_vxm_nxv8i1_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -1048,6 +1139,49 @@ ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vmsbc.vvm v16, v8, v24, v0 ; RV32-NEXT: vmv1r.v v0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vmul.ll b/llvm/test/CodeGen/RISCV/rvv/vmul.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmul.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmul.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zve64d \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+zve64d \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zve64d \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+zve64d \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vmul.nxv1i8.nxv1i8( @@ -316,9 +316,97 @@ define @intrinsic_vmul_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmul_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vmul.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmul.mask.nxv64i8.nxv64i8( @@ -593,9 +681,97 @@ define @intrinsic_vmul_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmul_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vmul.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmul.mask.nxv32i16.nxv32i16( @@ -824,9 +1000,97 @@ define @intrinsic_vmul_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmul_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vmul.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmul.mask.nxv16i32.nxv16i32( @@ -1009,9 +1273,97 @@ define @intrinsic_vmul_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmul_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vmul.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmul.mask.nxv8i64.nxv8i64( @@ -2107,12 +2459,98 @@ ; RV32-LABEL: intrinsic_vmul_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vmul.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmulh.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulh.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 ; RUN: sed 's/iXLen/i32/g' %s | not --crash llc -mtriple=riscv32 \ -; RUN: -mattr=+zve64d 2>&1 | FileCheck %s --check-prefixes=ZVE64D +; RUN: -mattr=+m,+zve64d 2>&1 | FileCheck %s --check-prefixes=ZVE64D ; RUN: sed 's/iXLen/i64/g' %s | not --crash llc -mtriple=riscv64 \ -; RUN: -mattr=+zve64d 2>&1 | FileCheck %s --check-prefixes=ZVE64D +; RUN: -mattr=+m,+zve64d 2>&1 | FileCheck %s --check-prefixes=ZVE64D ; ZVE64D: LLVM ERROR: Cannot select: intrinsic %llvm.riscv.vmulh @@ -318,9 +318,97 @@ define @intrinsic_vmulh_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmulh_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vmulh.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmulh.mask.nxv64i8.nxv64i8( @@ -595,9 +683,97 @@ define @intrinsic_vmulh_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmulh_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vmulh.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmulh.mask.nxv32i16.nxv32i16( @@ -826,9 +1002,97 @@ define @intrinsic_vmulh_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmulh_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vmulh.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmulh.mask.nxv16i32.nxv16i32( @@ -1011,9 +1275,97 @@ define @intrinsic_vmulh_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmulh_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vmulh.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmulh.mask.nxv8i64.nxv8i64( @@ -2109,12 +2461,98 @@ ; RV32-LABEL: intrinsic_vmulh_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vmulh.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhsu.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhsu.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmulhsu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulhsu.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 ; RUN: sed 's/iXLen/i32/g' %s | not --crash llc -mtriple=riscv32 \ -; RUN: -mattr=+zve64d 2>&1 | FileCheck %s --check-prefixes=ZVE64D +; RUN: -mattr=+m,+zve64d 2>&1 | FileCheck %s --check-prefixes=ZVE64D ; RUN: sed 's/iXLen/i64/g' %s | not --crash llc -mtriple=riscv64 \ -; RUN: -mattr=+zve64d 2>&1 | FileCheck %s --check-prefixes=ZVE64D +; RUN: -mattr=+m,+zve64d 2>&1 | FileCheck %s --check-prefixes=ZVE64D ; ZVE64D: LLVM ERROR: Cannot select: intrinsic %llvm.riscv.vmulhsu @@ -318,9 +318,97 @@ define @intrinsic_vmulhsu_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmulhsu_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vmulhsu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmulhsu.mask.nxv64i8.nxv64i8( @@ -595,9 +683,97 @@ define @intrinsic_vmulhsu_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmulhsu_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vmulhsu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmulhsu.mask.nxv32i16.nxv32i16( @@ -826,9 +1002,97 @@ define @intrinsic_vmulhsu_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmulhsu_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vmulhsu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmulhsu.mask.nxv16i32.nxv16i32( @@ -1011,9 +1275,97 @@ define @intrinsic_vmulhsu_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmulhsu_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vmulhsu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmulhsu.mask.nxv8i64.nxv8i64( @@ -2109,12 +2461,98 @@ ; RV32-LABEL: intrinsic_vmulhsu_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vmulhsu.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhu.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhu.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmulhu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmulhu.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 ; RUN: sed 's/iXLen/i32/g' %s | not --crash llc -mtriple=riscv32 \ -; RUN: -mattr=+zve64d 2>&1 | FileCheck %s --check-prefixes=ZVE64D +; RUN: -mattr=+m,+zve64d 2>&1 | FileCheck %s --check-prefixes=ZVE64D ; RUN: sed 's/iXLen/i64/g' %s | not --crash llc -mtriple=riscv64 \ -; RUN: -mattr=+zve64d 2>&1 | FileCheck %s --check-prefixes=ZVE64D +; RUN: -mattr=+m,+zve64d 2>&1 | FileCheck %s --check-prefixes=ZVE64D ; ZVE64D: LLVM ERROR: Cannot select: intrinsic %llvm.riscv.vmulhu @@ -318,9 +318,97 @@ define @intrinsic_vmulhu_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmulhu_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vmulhu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmulhu.mask.nxv64i8.nxv64i8( @@ -595,9 +683,97 @@ define @intrinsic_vmulhu_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmulhu_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vmulhu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmulhu.mask.nxv32i16.nxv32i16( @@ -826,9 +1002,97 @@ define @intrinsic_vmulhu_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmulhu_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vmulhu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmulhu.mask.nxv16i32.nxv16i32( @@ -1011,9 +1275,97 @@ define @intrinsic_vmulhu_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vmulhu_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vmulhu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmulhu.mask.nxv8i64.nxv8i64( @@ -2109,12 +2461,98 @@ ; RV32-LABEL: intrinsic_vmulhu_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vmulhu.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnmsac-vp.ll @@ -618,10 +618,100 @@ define @vnmsac_vv_nxv64i8( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vv_nxv64i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, mu ; CHECK-NEXT: vnmsac.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -634,10 +724,100 @@ define @vnmsac_vv_nxv64i8_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vv_nxv64i8_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma ; CHECK-NEXT: vnmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -684,10 +864,100 @@ define @vnmsac_vv_nxv64i8_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vv_nxv64i8_ta: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vnmsac.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1227,10 +1497,100 @@ define @vnmsac_vv_nxv32i16( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vv_nxv32i16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, mu ; CHECK-NEXT: vnmsac.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1243,10 +1603,100 @@ define @vnmsac_vv_nxv32i16_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vv_nxv32i16_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, tu, ma ; CHECK-NEXT: vnmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1293,10 +1743,100 @@ define @vnmsac_vv_nxv32i16_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vv_nxv32i16_ta: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vnmsac.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1735,10 +2275,100 @@ define @vnmsac_vv_nxv16i32( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vv_nxv16i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, mu ; CHECK-NEXT: vnmsac.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1751,10 +2381,100 @@ define @vnmsac_vv_nxv16i32_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vv_nxv16i32_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma ; CHECK-NEXT: vnmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -1801,10 +2521,100 @@ define @vnmsac_vv_nxv16i32_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vv_nxv16i32_ta: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vnmsac.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -2277,10 +3087,100 @@ define @vnmsac_vv_nxv8i64( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vv_nxv8i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, mu ; CHECK-NEXT: vnmsac.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -2293,10 +3193,100 @@ define @vnmsac_vv_nxv8i64_unmasked( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vv_nxv8i64_unmasked: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma ; CHECK-NEXT: vnmsac.vv v24, v8, v16 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -2311,6 +3301,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -2319,6 +3353,49 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, mu ; RV32-NEXT: vnmsac.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -2343,6 +3420,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -2351,6 +3472,49 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, tu, ma ; RV32-NEXT: vnmsac.vv v16, v8, v24 ; RV32-NEXT: vmv8r.v v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; @@ -2373,10 +3537,100 @@ define @vnmsac_vv_nxv8i64_ta( %a, %b, %c, %m, i32 zeroext %evl) { ; CHECK-LABEL: vnmsac_vv_nxv8i64_ta: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vnmsac.vv v24, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %splat = insertelement poison, i1 -1, i32 0 %allones = shufflevector %splat, poison, zeroinitializer @@ -2391,6 +3645,50 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 @@ -2399,6 +3697,49 @@ ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vnmsac.vv v16, v8, v24, v0.t ; RV32-NEXT: vmv.v.v v8, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vnmsub-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -target-abi=ilp32 \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -target-abi=ilp32 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -target-abi=lp64 \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -target-abi=lp64 \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64 ; This tests a mix of vmacc and vmsub by using different operand orders to @@ -154,9 +154,99 @@ define @vnmsub_vv_nxv64i8( %va, %vb, %vc) { ; CHECK-LABEL: vnmsub_vv_nxv64i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vnmsac.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %x = mul %vc, %vb %y = sub %va, %x @@ -299,9 +389,99 @@ define @vnmsub_vv_nxv32i16( %va, %vb, %vc) { ; CHECK-LABEL: vnmsub_vv_nxv32i16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vnmsac.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %x = mul %vc, %vb %y = sub %va, %x @@ -420,9 +600,99 @@ define @vnmsub_vv_nxv16i32( %va, %vb, %vc) { ; CHECK-LABEL: vnmsub_vv_nxv16i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-NEXT: vnmsub.vv v8, v24, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %x = mul %vc, %va %y = sub %vb, %x @@ -556,9 +826,99 @@ define @vnmsub_vv_nxv8i64( %va, %vb, %vc) { ; CHECK-LABEL: vnmsub_vv_nxv8i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vnmsac.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %x = mul %vb, %vc %y = sub %va, %x @@ -570,12 +930,99 @@ ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: sub a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 6 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 2 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 1 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vnmsac.vv v8, v16, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vor.ll b/llvm/test/CodeGen/RISCV/rvv/vor.ll --- a/llvm/test/CodeGen/RISCV/rvv/vor.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vor.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vor.nxv1i8.nxv1i8( , @@ -318,9 +318,97 @@ define @intrinsic_vor_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vor_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vor.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vor.mask.nxv64i8.nxv64i8( @@ -601,9 +689,97 @@ define @intrinsic_vor_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vor_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vor.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vor.mask.nxv32i16.nxv32i16( @@ -837,9 +1013,97 @@ define @intrinsic_vor_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vor_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vor.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vor.mask.nxv16i32.nxv16i32( @@ -1026,9 +1290,97 @@ define @intrinsic_vor_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vor_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vor.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vor.mask.nxv8i64.nxv8i64( @@ -2146,12 +2498,98 @@ ; RV32-LABEL: intrinsic_vor_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vor.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV32 -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=RV64 declare @llvm.vp.gather.nxv1i8.nxv1p0(, , i32) @@ -2279,6 +2279,52 @@ define @vpgather_nxv16f64( %ptrs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_nxv16f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: sub sp, sp, a1 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 3 +; RV32-NEXT: sub a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a2, 6 +; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 2 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a2, a1, 1 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v24, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: sub a2, a0, a1 @@ -2298,10 +2344,62 @@ ; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t ; RV32-NEXT: vmv.v.v v8, v24 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_nxv16f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: sub sp, sp, a1 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v24, v0 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: sub a2, a0, a1 @@ -2320,6 +2418,12 @@ ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vluxei64.v v8, (zero), v8, v0.t +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %v = call @llvm.vp.gather.nxv16f64.nxv16p0( %ptrs, %m, i32 %evl) ret %v @@ -2328,6 +2432,52 @@ define @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv16i16_nxv16f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: sub a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 6 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 2 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 1 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v12, v0 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vsext.vf2 v16, v8 @@ -2349,10 +2499,100 @@ ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_nxv16i16_nxv16f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v8 @@ -2376,6 +2616,50 @@ ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs %v = call @llvm.vp.gather.nxv16f64.nxv16p0( %ptrs, %m, i32 %evl) @@ -2385,6 +2669,52 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: sub a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 6 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 2 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 1 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v12, v0 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vsext.vf2 v16, v8 @@ -2406,10 +2736,100 @@ ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v10 @@ -2433,6 +2853,50 @@ ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei64.v v8, (a0), v24, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -2443,6 +2907,52 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: sub a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 6 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 2 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 1 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v12, v0 ; RV32-NEXT: vsetvli a2, zero, e32, m8, ta, ma ; RV32-NEXT: vzext.vf2 v16, v8 @@ -2464,10 +2974,77 @@ ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vluxei32.v v8, (a0), v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: vsetvli a2, zero, e32, m4, ta, ma ; RV64-NEXT: vzext.vf2 v16, v8 @@ -2491,6 +3068,27 @@ ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v0, v12 ; RV64-NEXT: vluxei32.v v8, (a0), v24, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare @llvm.vp.load.nxv1i8.p0(*, , i32) @@ -519,6 +519,52 @@ ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB38_4: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: slli a5, a5, 3 +; CHECK-NEXT: sub sp, sp, a5 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: slli a6, a5, 3 +; CHECK-NEXT: sub a5, a6, a5 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs1r.v v24, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: li a6, 6 +; CHECK-NEXT: mul a5, a5, a6 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs1r.v v25, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: slli a6, a5, 2 +; CHECK-NEXT: add a5, a6, a5 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs1r.v v26, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: slli a5, a5, 2 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs1r.v v27, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: slli a6, a5, 1 +; CHECK-NEXT: add a5, a6, a5 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs1r.v v28, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: slli a5, a5, 1 +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs1r.v v29, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a5, vlenb +; CHECK-NEXT: add a5, sp, a5 +; CHECK-NEXT: addi a5, a5, 16 +; CHECK-NEXT: vs1r.v v30, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: addi a5, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a5) # Unknown-size Folded Spill ; CHECK-NEXT: srli a5, a3, 2 ; CHECK-NEXT: vsetvli a6, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v8, a5 @@ -534,6 +580,50 @@ ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vle64.v v8, (a0), v0.t ; CHECK-NEXT: vs1r.v v24, (a1) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %load = call @llvm.vp.load.nxv17f64.p0(* %ptr, %m, i32 %evl) %lo = call @llvm.vector.extract.nxv16f64( %load, i64 0) diff --git a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll @@ -373,9 +373,62 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: li a4, 18 +; CHECK-NEXT: mul a1, a1, a4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 18 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a4, a1, 4 +; CHECK-NEXT: sub a1, a4, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 14 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 13 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 12 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 11 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a4, 10 +; CHECK-NEXT: mul a1, a1, a4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a4, a1, 3 +; CHECK-NEXT: add a1, a4, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: vmv8r.v v24, v16 ; CHECK-NEXT: addi a1, sp, 16 @@ -403,7 +456,60 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v8, v8, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -414,6 +520,14 @@ define @vpmerge_vx_nxv128i8(i8 %a, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpmerge_vx_nxv128i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a3, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) @@ -432,6 +546,12 @@ ; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 %a, i32 0 %va = shufflevector %elt.head, poison, zeroinitializer @@ -442,6 +562,14 @@ define @vpmerge_vi_nxv128i8( %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpmerge_vi_nxv128i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: vsetvli a2, zero, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a0) @@ -460,6 +588,12 @@ ; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %elt.head = insertelement poison, i8 2, i32 0 %va = shufflevector %elt.head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll @@ -794,11 +794,101 @@ ; ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, %idxs call void @llvm.vp.scatter.nxv8i64.nxv8p0( %val, %ptrs, %m, i32 %evl) @@ -817,11 +907,101 @@ ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -865,11 +1045,101 @@ ; ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, %idxs call void @llvm.vp.scatter.nxv8i64.nxv8p0( %val, %ptrs, %m, i32 %evl) @@ -888,11 +1158,101 @@ ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -935,11 +1295,101 @@ ; ; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds i64, ptr %base, %idxs call void @llvm.vp.scatter.nxv8i64.nxv8p0( %val, %ptrs, %m, i32 %evl) @@ -957,11 +1407,101 @@ ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -980,11 +1520,101 @@ ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8i64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vzext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds i64, ptr %base, %eidxs @@ -995,11 +1625,55 @@ define void @vpscatter_baseidx_nxv8i64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8i64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 2 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 1 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma ; RV32-NEXT: vnsrl.wi v24, v16, 0 ; RV32-NEXT: vsll.vi v16, v24, 3 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8i64: @@ -1551,11 +2225,101 @@ ; ; RV64-LABEL: vpscatter_baseidx_nxv6i8_nxv6f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs call void @llvm.vp.scatter.nxv6f64.nxv6p0( %val, %ptrs, %m, i32 %evl) @@ -1574,11 +2338,101 @@ ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i8_nxv6f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1622,11 +2476,101 @@ ; ; RV64-LABEL: vpscatter_baseidx_nxv6i16_nxv6f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs call void @llvm.vp.scatter.nxv6f64.nxv6p0( %val, %ptrs, %m, i32 %evl) @@ -1645,11 +2589,101 @@ ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i16_nxv6f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1692,11 +2726,101 @@ ; ; RV64-LABEL: vpscatter_baseidx_nxv6i32_nxv6f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs call void @llvm.vp.scatter.nxv6f64.nxv6p0( %val, %ptrs, %m, i32 %evl) @@ -1714,11 +2838,101 @@ ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv6i32_nxv6f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1737,11 +2951,101 @@ ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv6i32_nxv6f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vzext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1752,11 +3056,55 @@ define void @vpscatter_baseidx_nxv6f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv6f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 2 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 1 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma ; RV32-NEXT: vnsrl.wi v24, v16, 0 ; RV32-NEXT: vsll.vi v16, v24, 3 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv6f64: @@ -1801,11 +3149,101 @@ ; ; RV64-LABEL: vpscatter_baseidx_nxv8i8_nxv8f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs call void @llvm.vp.scatter.nxv8f64.nxv8p0( %val, %ptrs, %m, i32 %evl) @@ -1824,11 +3262,101 @@ ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i8_nxv8f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf8 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1872,11 +3400,101 @@ ; ; RV64-LABEL: vpscatter_baseidx_nxv8i16_nxv8f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs call void @llvm.vp.scatter.nxv8f64.nxv8p0( %val, %ptrs, %m, i32 %evl) @@ -1895,11 +3513,101 @@ ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i16_nxv8f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1942,11 +3650,101 @@ ; ; RV64-LABEL: vpscatter_baseidx_nxv8i32_nxv8f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %ptrs = getelementptr inbounds double, ptr %base, %idxs call void @llvm.vp.scatter.nxv8f64.nxv8p0( %val, %ptrs, %m, i32 %evl) @@ -1964,11 +3762,101 @@ ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv8i32_nxv8f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = sext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -1987,11 +3875,101 @@ ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv8i32_nxv8f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 3 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: sub a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 6 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 2 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a3, a2, 1 +; RV64-NEXT: add a2, a3, a2 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vzext.vf2 v24, v16 ; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs @@ -2002,11 +3980,55 @@ define void @vpscatter_baseidx_nxv8f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv8f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 2 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 1 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli a2, zero, e32, m4, ta, ma ; RV32-NEXT: vnsrl.wi v24, v16, 0 ; RV32-NEXT: vsll.vi v16, v24, 3 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv8f64: @@ -2026,6 +4048,52 @@ define void @vpscatter_nxv16f64( %val, %ptrs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_nxv16f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 3 +; RV32-NEXT: sub a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 6 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 2 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a3, a2, 1 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vl8re32.v v24, (a0) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: mv a2, a1 @@ -2044,6 +4112,50 @@ ; RV32-NEXT: vslidedown.vx v0, v0, a0 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_nxv16f64: @@ -2051,9 +4163,56 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: sub sp, sp, a1 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a3, a1, 4 +; RV64-NEXT: sub a1, a3, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 14 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 13 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 12 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 11 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 10 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a3, a1, 3 +; RV64-NEXT: add a1, a3, a1 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a3, a1, 3 ; RV64-NEXT: add a3, a0, a3 @@ -2080,7 +4239,54 @@ ; RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v16, (zero), v8, v0.t ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 14 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 13 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -2091,6 +4297,76 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 12 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 12 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 11 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 10 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vl4re16.v v4, (a1) ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; RV32-NEXT: vsext.vf2 v24, v4 @@ -2112,6 +4388,74 @@ ; RV32-NEXT: vslidedown.vx v0, v0, a1 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64: @@ -2119,9 +4463,57 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 4 +; RV64-NEXT: li a4, 24 +; RV64-NEXT: mul a3, a3, a4 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 23 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 22 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 21 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 20 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 19 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 18 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a4, a3, 4 +; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV64-NEXT: vl4re16.v v24, (a1) ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 @@ -2160,7 +4552,55 @@ ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 23 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 22 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 21 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 20 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 19 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 18 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 24 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -2172,6 +4612,76 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 12 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 12 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 11 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 10 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vl4re16.v v4, (a1) ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; RV32-NEXT: vsext.vf2 v24, v4 @@ -2193,6 +4703,74 @@ ; RV32-NEXT: vslidedown.vx v0, v0, a1 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64: @@ -2200,9 +4778,57 @@ ; RV64-NEXT: addi sp, sp, -16 ; RV64-NEXT: .cfi_def_cfa_offset 16 ; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 4 +; RV64-NEXT: li a4, 24 +; RV64-NEXT: mul a3, a3, a4 ; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 23 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 22 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 21 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 20 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 19 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 18 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a4, a3, 4 +; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV64-NEXT: vl4re16.v v24, (a1) ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 @@ -2241,7 +4867,55 @@ ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 23 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 22 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 21 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 20 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 19 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 18 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 4 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 24 +; RV64-NEXT: mul a0, a0, a1 ; RV64-NEXT: add sp, sp, a0 ; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret @@ -2254,6 +4928,76 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %val, ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 12 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 12 * vlenb +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 11 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 10 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vl4re16.v v4, (a1) ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; RV32-NEXT: vzext.vf2 v24, v4 @@ -2275,10 +5019,148 @@ ; RV32-NEXT: vslidedown.vx v0, v0, a1 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 11 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 10 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 12 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 12 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: sub sp, sp, a3 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 12 * vlenb +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 11 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 10 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a4, a3, 3 +; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a4, a3, 3 +; RV64-NEXT: sub a3, a4, a3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 6 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a4, a3, 2 +; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 2 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a4, a3, 1 +; RV64-NEXT: add a3, a4, a3 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 1 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV64-NEXT: addi a3, sp, 16 +; RV64-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV64-NEXT: vl4re16.v v28, (a1) ; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; RV64-NEXT: vzext.vf2 v24, v30 @@ -2302,6 +5184,74 @@ ; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vsoxei32.v v16, (a0), v24, v0.t +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 11 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 10 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 12 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %eidxs = zext %idxs to %ptrs = getelementptr inbounds double, ptr %base, %eidxs diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll --- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+d,+zfh,+zvfh,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+d,+zfh,+zvfh,+v \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+d,+zfh,+zvfh,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s declare void @llvm.vp.store.nxv1i8.p0(, *, , i32) @@ -400,6 +400,59 @@ define void @vpstore_nxv17f64( %val, * %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpstore_nxv17f64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 4 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 4 +; CHECK-NEXT: sub a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v1, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: li a4, 14 +; CHECK-NEXT: mul a3, a3, a4 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v2, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: li a4, 13 +; CHECK-NEXT: mul a3, a3, a4 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v3, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: li a4, 12 +; CHECK-NEXT: mul a3, a3, a4 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v4, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: li a4, 11 +; CHECK-NEXT: mul a3, a3, a4 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v5, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: li a4, 10 +; CHECK-NEXT: mul a3, a3, a4 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v6, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 3 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v7, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: slli a4, a3, 1 ; CHECK-NEXT: vmv1r.v v24, v0 @@ -413,12 +466,6 @@ ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: mv a6, a3 ; CHECK-NEXT: .LBB31_4: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a7, vlenb -; CHECK-NEXT: slli a7, a7, 3 -; CHECK-NEXT: sub sp, sp, a7 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vl8re64.v v0, (a0) ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill @@ -454,7 +501,54 @@ ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vse64.v v8, (a1), v0.t ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp.ll @@ -93,6 +93,18 @@ define half @vpreduce_fadd_nxv64f16(half %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_nxv64f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 1 ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma @@ -115,6 +127,16 @@ ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfredusum.vs v25, v16, v25, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %r = call reassoc half @llvm.vp.reduce.fadd.nxv64f16(half %s, %v, %m, i32 %evl) ret half %r @@ -123,6 +145,18 @@ define half @vpreduce_ord_fadd_nxv64f16(half %s, %v, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_nxv64f16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 1 ; CHECK-NEXT: vsetvli a3, zero, e8, m1, ta, ma @@ -145,6 +179,16 @@ ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfredosum.vs v25, v16, v25, v0.t ; CHECK-NEXT: vfmv.f.s fa0, v25 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %r = call half @llvm.vp.reduce.fadd.nxv64f16(half %s, %v, %m, i32 %evl) ret half %r diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll @@ -1153,6 +1153,18 @@ define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, %v, %m, i32 zeroext %evl) { ; RV32-LABEL: vpreduce_umax_nxv32i32: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 1 +; RV32-NEXT: sub sp, sp, a2 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: srli a3, a2, 2 ; RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma @@ -1175,10 +1187,32 @@ ; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: vredmaxu.vs v25, v16, v25, v0.t ; RV32-NEXT: vmv.x.s a0, v25 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl1r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vl1r.v v25, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 1 +; RV32-NEXT: add sp, sp, a1 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vpreduce_umax_nxv32i32: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 1 +; RV64-NEXT: sub sp, sp, a2 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: srli a3, a2, 2 ; RV64-NEXT: vsetvli a4, zero, e8, mf2, ta, ma @@ -1203,6 +1237,16 @@ ; RV64-NEXT: vmv1r.v v0, v24 ; RV64-NEXT: vredmaxu.vs v25, v16, v25, v0.t ; RV64-NEXT: vmv.x.s a0, v25 +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl1r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl1r.v v25, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: slli a1, a1, 1 +; RV64-NEXT: add sp, sp, a1 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %r = call i32 @llvm.vp.reduce.umax.nxv32i32(i32 %s, %v, %m, i32 %evl) ret i32 %r diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-V -; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVE64X -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-V -; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVE64X +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-V +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVE64X +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-V +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVE64X define @vrem_vv_nxv1i8( %va, %vb) { ; CHECK-LABEL: vrem_vv_nxv1i8: @@ -275,6 +275,52 @@ define @vrem_vi_nxv64i8_0( %va) { ; CHECK-LABEL: vrem_vi_nxv64i8_0: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a0, 109 ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmulh.vx v16, v8, a0 @@ -284,6 +330,50 @@ ; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: li a0, -7 ; CHECK-NEXT: vnmsac.vx v8, a0, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i8 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -586,6 +676,52 @@ define @vrem_vi_nxv32i16_0( %va) { ; RV32-LABEL: vrem_vi_nxv32i16_0: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32-NEXT: lui a0, 1048571 ; RV32-NEXT: addi a0, a0, 1755 ; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, ma @@ -595,10 +731,100 @@ ; RV32-NEXT: vadd.vv v16, v16, v24 ; RV32-NEXT: li a0, -7 ; RV32-NEXT: vnmsac.vx v8, a0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vrem_vi_nxv32i16_0: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV64-NEXT: lui a0, 1048571 ; RV64-NEXT: addiw a0, a0, 1755 ; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, ma @@ -608,6 +834,50 @@ ; RV64-NEXT: vadd.vv v16, v16, v24 ; RV64-NEXT: li a0, -7 ; RV64-NEXT: vnmsac.vx v8, a0, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %head = insertelement poison, i16 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -864,6 +1134,52 @@ define @vrem_vi_nxv16i32_0( %va) { ; RV32-LABEL: vrem_vi_nxv16i32_0: ; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32-NEXT: lui a0, 449390 ; RV32-NEXT: addi a0, a0, -1171 ; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma @@ -874,10 +1190,100 @@ ; RV32-NEXT: vadd.vv v16, v16, v24 ; RV32-NEXT: li a0, -7 ; RV32-NEXT: vnmsac.vx v8, a0, v16 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 +; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: vrem_vi_nxv16i32_0: ; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV64-NEXT: lui a0, 449390 ; RV64-NEXT: addiw a0, a0, -1171 ; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma @@ -888,6 +1294,50 @@ ; RV64-NEXT: vadd.vv v16, v16, v24 ; RV64-NEXT: li a0, -7 ; RV64-NEXT: vnmsac.vx v8, a0, v16 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 3 +; RV64-NEXT: sub a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: li a1, 6 +; RV64-NEXT: mul a0, a0, a1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 2 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 2 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a1, a0, 1 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: addi a0, a0, 16 +; RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: add sp, sp, a0 +; RV64-NEXT: addi sp, sp, 16 ; RV64-NEXT: ret %head = insertelement poison, i32 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer @@ -1190,6 +1640,50 @@ ; RV32-V: # %bb.0: ; RV32-V-NEXT: addi sp, sp, -16 ; RV32-V-NEXT: .cfi_def_cfa_offset 16 +; RV32-V-NEXT: csrr a0, vlenb +; RV32-V-NEXT: slli a0, a0, 3 +; RV32-V-NEXT: sub sp, sp, a0 +; RV32-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV32-V-NEXT: csrr a0, vlenb +; RV32-V-NEXT: slli a1, a0, 3 +; RV32-V-NEXT: sub a0, a1, a0 +; RV32-V-NEXT: add a0, sp, a0 +; RV32-V-NEXT: addi a0, a0, 16 +; RV32-V-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV32-V-NEXT: csrr a0, vlenb +; RV32-V-NEXT: li a1, 6 +; RV32-V-NEXT: mul a0, a0, a1 +; RV32-V-NEXT: add a0, sp, a0 +; RV32-V-NEXT: addi a0, a0, 16 +; RV32-V-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV32-V-NEXT: csrr a0, vlenb +; RV32-V-NEXT: slli a1, a0, 2 +; RV32-V-NEXT: add a0, a1, a0 +; RV32-V-NEXT: add a0, sp, a0 +; RV32-V-NEXT: addi a0, a0, 16 +; RV32-V-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV32-V-NEXT: csrr a0, vlenb +; RV32-V-NEXT: slli a0, a0, 2 +; RV32-V-NEXT: add a0, sp, a0 +; RV32-V-NEXT: addi a0, a0, 16 +; RV32-V-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV32-V-NEXT: csrr a0, vlenb +; RV32-V-NEXT: slli a1, a0, 1 +; RV32-V-NEXT: add a0, a1, a0 +; RV32-V-NEXT: add a0, sp, a0 +; RV32-V-NEXT: addi a0, a0, 16 +; RV32-V-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV32-V-NEXT: csrr a0, vlenb +; RV32-V-NEXT: slli a0, a0, 1 +; RV32-V-NEXT: add a0, sp, a0 +; RV32-V-NEXT: addi a0, a0, 16 +; RV32-V-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV32-V-NEXT: csrr a0, vlenb +; RV32-V-NEXT: add a0, sp, a0 +; RV32-V-NEXT: addi a0, a0, 16 +; RV32-V-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV32-V-NEXT: addi a0, sp, 16 +; RV32-V-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV32-V-NEXT: lui a0, 748983 ; RV32-V-NEXT: addi a0, a0, -586 ; RV32-V-NEXT: sw a0, 12(sp) @@ -1206,6 +1700,49 @@ ; RV32-V-NEXT: vadd.vv v16, v16, v24 ; RV32-V-NEXT: li a0, -7 ; RV32-V-NEXT: vnmsac.vx v8, a0, v16 +; RV32-V-NEXT: csrr a0, vlenb +; RV32-V-NEXT: slli a1, a0, 3 +; RV32-V-NEXT: sub a0, a1, a0 +; RV32-V-NEXT: add a0, sp, a0 +; RV32-V-NEXT: addi a0, a0, 16 +; RV32-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-V-NEXT: csrr a0, vlenb +; RV32-V-NEXT: li a1, 6 +; RV32-V-NEXT: mul a0, a0, a1 +; RV32-V-NEXT: add a0, sp, a0 +; RV32-V-NEXT: addi a0, a0, 16 +; RV32-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-V-NEXT: csrr a0, vlenb +; RV32-V-NEXT: slli a1, a0, 2 +; RV32-V-NEXT: add a0, a1, a0 +; RV32-V-NEXT: add a0, sp, a0 +; RV32-V-NEXT: addi a0, a0, 16 +; RV32-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-V-NEXT: csrr a0, vlenb +; RV32-V-NEXT: slli a0, a0, 2 +; RV32-V-NEXT: add a0, sp, a0 +; RV32-V-NEXT: addi a0, a0, 16 +; RV32-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-V-NEXT: csrr a0, vlenb +; RV32-V-NEXT: slli a1, a0, 1 +; RV32-V-NEXT: add a0, a1, a0 +; RV32-V-NEXT: add a0, sp, a0 +; RV32-V-NEXT: addi a0, a0, 16 +; RV32-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-V-NEXT: csrr a0, vlenb +; RV32-V-NEXT: slli a0, a0, 1 +; RV32-V-NEXT: add a0, sp, a0 +; RV32-V-NEXT: addi a0, a0, 16 +; RV32-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-V-NEXT: csrr a0, vlenb +; RV32-V-NEXT: add a0, sp, a0 +; RV32-V-NEXT: addi a0, a0, 16 +; RV32-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-V-NEXT: addi a0, sp, 16 +; RV32-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-V-NEXT: csrr a0, vlenb +; RV32-V-NEXT: slli a0, a0, 3 +; RV32-V-NEXT: add sp, sp, a0 ; RV32-V-NEXT: addi sp, sp, 16 ; RV32-V-NEXT: ret ; @@ -1218,6 +1755,52 @@ ; ; RV64-V-LABEL: vrem_vi_nxv8i64_0: ; RV64-V: # %bb.0: +; RV64-V-NEXT: addi sp, sp, -16 +; RV64-V-NEXT: .cfi_def_cfa_offset 16 +; RV64-V-NEXT: csrr a0, vlenb +; RV64-V-NEXT: slli a0, a0, 3 +; RV64-V-NEXT: sub sp, sp, a0 +; RV64-V-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; RV64-V-NEXT: csrr a0, vlenb +; RV64-V-NEXT: slli a1, a0, 3 +; RV64-V-NEXT: sub a0, a1, a0 +; RV64-V-NEXT: add a0, sp, a0 +; RV64-V-NEXT: addi a0, a0, 16 +; RV64-V-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; RV64-V-NEXT: csrr a0, vlenb +; RV64-V-NEXT: li a1, 6 +; RV64-V-NEXT: mul a0, a0, a1 +; RV64-V-NEXT: add a0, sp, a0 +; RV64-V-NEXT: addi a0, a0, 16 +; RV64-V-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; RV64-V-NEXT: csrr a0, vlenb +; RV64-V-NEXT: slli a1, a0, 2 +; RV64-V-NEXT: add a0, a1, a0 +; RV64-V-NEXT: add a0, sp, a0 +; RV64-V-NEXT: addi a0, a0, 16 +; RV64-V-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; RV64-V-NEXT: csrr a0, vlenb +; RV64-V-NEXT: slli a0, a0, 2 +; RV64-V-NEXT: add a0, sp, a0 +; RV64-V-NEXT: addi a0, a0, 16 +; RV64-V-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; RV64-V-NEXT: csrr a0, vlenb +; RV64-V-NEXT: slli a1, a0, 1 +; RV64-V-NEXT: add a0, a1, a0 +; RV64-V-NEXT: add a0, sp, a0 +; RV64-V-NEXT: addi a0, a0, 16 +; RV64-V-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; RV64-V-NEXT: csrr a0, vlenb +; RV64-V-NEXT: slli a0, a0, 1 +; RV64-V-NEXT: add a0, sp, a0 +; RV64-V-NEXT: addi a0, a0, 16 +; RV64-V-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; RV64-V-NEXT: csrr a0, vlenb +; RV64-V-NEXT: add a0, sp, a0 +; RV64-V-NEXT: addi a0, a0, 16 +; RV64-V-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; RV64-V-NEXT: addi a0, sp, 16 +; RV64-V-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; RV64-V-NEXT: lui a0, %hi(.LCPI65_0) ; RV64-V-NEXT: ld a0, %lo(.LCPI65_0)(a0) ; RV64-V-NEXT: vsetvli a1, zero, e64, m8, ta, ma @@ -1228,6 +1811,50 @@ ; RV64-V-NEXT: vadd.vv v16, v16, v24 ; RV64-V-NEXT: li a0, -7 ; RV64-V-NEXT: vnmsac.vx v8, a0, v16 +; RV64-V-NEXT: csrr a0, vlenb +; RV64-V-NEXT: slli a1, a0, 3 +; RV64-V-NEXT: sub a0, a1, a0 +; RV64-V-NEXT: add a0, sp, a0 +; RV64-V-NEXT: addi a0, a0, 16 +; RV64-V-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV64-V-NEXT: csrr a0, vlenb +; RV64-V-NEXT: li a1, 6 +; RV64-V-NEXT: mul a0, a0, a1 +; RV64-V-NEXT: add a0, sp, a0 +; RV64-V-NEXT: addi a0, a0, 16 +; RV64-V-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV64-V-NEXT: csrr a0, vlenb +; RV64-V-NEXT: slli a1, a0, 2 +; RV64-V-NEXT: add a0, a1, a0 +; RV64-V-NEXT: add a0, sp, a0 +; RV64-V-NEXT: addi a0, a0, 16 +; RV64-V-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV64-V-NEXT: csrr a0, vlenb +; RV64-V-NEXT: slli a0, a0, 2 +; RV64-V-NEXT: add a0, sp, a0 +; RV64-V-NEXT: addi a0, a0, 16 +; RV64-V-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV64-V-NEXT: csrr a0, vlenb +; RV64-V-NEXT: slli a1, a0, 1 +; RV64-V-NEXT: add a0, a1, a0 +; RV64-V-NEXT: add a0, sp, a0 +; RV64-V-NEXT: addi a0, a0, 16 +; RV64-V-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV64-V-NEXT: csrr a0, vlenb +; RV64-V-NEXT: slli a0, a0, 1 +; RV64-V-NEXT: add a0, sp, a0 +; RV64-V-NEXT: addi a0, a0, 16 +; RV64-V-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV64-V-NEXT: csrr a0, vlenb +; RV64-V-NEXT: add a0, sp, a0 +; RV64-V-NEXT: addi a0, a0, 16 +; RV64-V-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV64-V-NEXT: addi a0, sp, 16 +; RV64-V-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV64-V-NEXT: csrr a0, vlenb +; RV64-V-NEXT: slli a0, a0, 3 +; RV64-V-NEXT: add sp, sp, a0 +; RV64-V-NEXT: addi sp, sp, 16 ; RV64-V-NEXT: ret %head = insertelement poison, i64 -7, i32 0 %splat = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vrem.ll b/llvm/test/CodeGen/RISCV/rvv/vrem.ll --- a/llvm/test/CodeGen/RISCV/rvv/vrem.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrem.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vrem.nxv1i8.nxv1i8( , @@ -318,9 +318,97 @@ define @intrinsic_vrem_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vrem_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vrem.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrem.mask.nxv64i8.nxv64i8( @@ -601,9 +689,97 @@ define @intrinsic_vrem_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vrem_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vrem.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrem.mask.nxv32i16.nxv32i16( @@ -837,9 +1013,97 @@ define @intrinsic_vrem_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vrem_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vrem.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrem.mask.nxv16i32.nxv16i32( @@ -1026,9 +1290,97 @@ define @intrinsic_vrem_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vrem_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vrem.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrem.mask.nxv8i64.nxv8i64( @@ -2146,12 +2498,98 @@ ; RV32-LABEL: intrinsic_vrem_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vrem.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vremu-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-V -; RUN: llc -mtriple=riscv32 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVE64X -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-V -; RUN: llc -mtriple=riscv64 -mattr=+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVE64X +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-V +; RUN: llc -mtriple=riscv32 -mattr=+m,+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,ZVE64X +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-V +; RUN: llc -mtriple=riscv64 -mattr=+m,+zve64x -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,ZVE64X define @vremu_vv_nxv1i8( %va, %vb) { ; CHECK-LABEL: vremu_vv_nxv1i8: @@ -1250,12 +1250,102 @@ define @vremu_vi_nxv8i64_2( %va, %vb) { ; CHECK-LABEL: vremu_vi_nxv8i64_2: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a0, 16 ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv.v.x v24, a0 ; CHECK-NEXT: vsll.vv v16, v24, v16 ; CHECK-NEXT: vadd.vi v16, v16, -1 ; CHECK-NEXT: vand.vv v8, v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %head = insertelement poison, i64 16, i32 0 %splat = shufflevector %head, poison, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vremu.ll b/llvm/test/CodeGen/RISCV/rvv/vremu.ll --- a/llvm/test/CodeGen/RISCV/rvv/vremu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vremu.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vremu.nxv1i8.nxv1i8( , @@ -318,9 +318,97 @@ define @intrinsic_vremu_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vremu_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vremu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vremu.mask.nxv64i8.nxv64i8( @@ -601,9 +689,97 @@ define @intrinsic_vremu_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vremu_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vremu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vremu.mask.nxv32i16.nxv32i16( @@ -837,9 +1013,97 @@ define @intrinsic_vremu_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vremu_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vremu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vremu.mask.nxv16i32.nxv16i32( @@ -1026,9 +1290,97 @@ define @intrinsic_vremu_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vremu_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vremu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vremu.mask.nxv8i64.nxv8i64( @@ -2146,12 +2498,98 @@ ; RV32-LABEL: intrinsic_vremu_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vremu.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgather-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vrgather-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vrgather-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrgather-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+zvfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d,+zfh,+zvfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vrgather.vv.nxv1i8.i32( , @@ -298,9 +298,97 @@ define @intrinsic_vrgather_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgather_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vrgather.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.nxv64i8.i32( @@ -323,9 +411,97 @@ define @intrinsic_vrgather_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgather_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.mask.nxv64i8.i32( @@ -587,9 +763,97 @@ define @intrinsic_vrgather_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgather_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vrgather.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.nxv32i16.i32( @@ -612,9 +876,97 @@ define @intrinsic_vrgather_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgather_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.mask.nxv32i16.i32( @@ -828,9 +1180,97 @@ define @intrinsic_vrgather_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgather_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vrgather.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.nxv16i32.i32( @@ -853,9 +1293,97 @@ define @intrinsic_vrgather_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgather_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.mask.nxv16i32.i32( @@ -1117,9 +1645,97 @@ define @intrinsic_vrgather_vv_nxv32f16_nxv32f16_nxv32i16( %0, %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgather_vv_nxv32f16_nxv32f16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vrgather.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.nxv32f16.i32( @@ -1142,9 +1758,97 @@ define @intrinsic_vrgather_mask_vv_nxv32f16_nxv32f16_nxv32i16( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgather_mask_vv_nxv32f16_nxv32f16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.mask.nxv32f16.i32( @@ -1358,9 +2062,97 @@ define @intrinsic_vrgather_vv_nxv16f32_nxv16f32_nxv16i32( %0, %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgather_vv_nxv16f32_nxv16f32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vrgather.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.nxv16f32.i32( @@ -1383,9 +2175,97 @@ define @intrinsic_vrgather_mask_vv_nxv16f32_nxv16f32_nxv16i32( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgather_mask_vv_nxv16f32_nxv16f32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.mask.nxv16f32.i32( @@ -1551,9 +2431,97 @@ define @intrinsic_vrgather_vv_nxv8f64_nxv8f64_nxv8i64( %0, %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgather_vv_nxv8f64_nxv8f64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vrgather.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.nxv8f64.i32( @@ -1576,9 +2544,97 @@ define @intrinsic_vrgather_mask_vv_nxv8f64_nxv8f64_nxv8i64( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgather_mask_vv_nxv8f64_nxv8f64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.mask.nxv8f64.i32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgather-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vrgather-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vrgather-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrgather-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+zvfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d,+zfh,+zvfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vrgather.vv.nxv1i8.i64( , @@ -298,9 +298,97 @@ define @intrinsic_vrgather_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgather_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vrgather.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.nxv64i8.i64( @@ -323,9 +411,97 @@ define @intrinsic_vrgather_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgather_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.mask.nxv64i8.i64( @@ -587,9 +763,97 @@ define @intrinsic_vrgather_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgather_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vrgather.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.nxv32i16.i64( @@ -612,9 +876,97 @@ define @intrinsic_vrgather_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgather_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.mask.nxv32i16.i64( @@ -828,9 +1180,97 @@ define @intrinsic_vrgather_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgather_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vrgather.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.nxv16i32.i64( @@ -853,9 +1293,97 @@ define @intrinsic_vrgather_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgather_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.mask.nxv16i32.i64( @@ -1021,9 +1549,97 @@ define @intrinsic_vrgather_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgather_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vrgather.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.nxv8i64.i64( @@ -1046,9 +1662,97 @@ define @intrinsic_vrgather_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgather_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.mask.nxv8i64.i64( @@ -1310,9 +2014,97 @@ define @intrinsic_vrgather_vv_nxv32f16_nxv32f16_nxv32i16( %0, %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgather_vv_nxv32f16_nxv32f16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vrgather.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.nxv32f16.i64( @@ -1335,9 +2127,97 @@ define @intrinsic_vrgather_mask_vv_nxv32f16_nxv32f16_nxv32i16( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgather_mask_vv_nxv32f16_nxv32f16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.mask.nxv32f16.i64( @@ -1551,9 +2431,97 @@ define @intrinsic_vrgather_vv_nxv16f32_nxv16f32_nxv16i32( %0, %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgather_vv_nxv16f32_nxv16f32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vrgather.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.nxv16f32.i64( @@ -1576,9 +2544,97 @@ define @intrinsic_vrgather_mask_vv_nxv16f32_nxv16f32_nxv16i32( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgather_mask_vv_nxv16f32_nxv16f32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.mask.nxv16f32.i64( @@ -1744,9 +2800,97 @@ define @intrinsic_vrgather_vv_nxv8f64_nxv8f64_nxv8i64( %0, %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgather_vv_nxv8f64_nxv8f64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vrgather.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.nxv8f64.i64( @@ -1769,9 +2913,97 @@ define @intrinsic_vrgather_mask_vv_nxv8f64_nxv8f64_nxv8i64( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgather_mask_vv_nxv8f64_nxv8f64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgather.vv.mask.nxv8f64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zfh,+zvfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+d,+zfh,+zvfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vrgatherei16.vv.nxv1i8( , @@ -538,9 +538,97 @@ define @intrinsic_vrgatherei16_vv_nxv32i16_nxv32i16( %0, %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_vv_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vrgatherei16.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.nxv32i16( @@ -563,9 +651,97 @@ define @intrinsic_vrgatherei16_mask_vv_nxv32i16_nxv32i16( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_mask_vv_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vrgatherei16.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.mask.nxv32i16( @@ -731,9 +907,97 @@ define @intrinsic_vrgatherei16_vv_nxv16i32_nxv16i32( %0, %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_vv_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vrgatherei16.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.nxv16i32( @@ -756,9 +1020,51 @@ define @intrinsic_vrgatherei16_mask_vv_nxv16i32_nxv16i32( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_mask_vv_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl4re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vrgatherei16.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.mask.nxv16i32( @@ -828,9 +1134,97 @@ define @intrinsic_vrgatherei16_vv_nxv8i64_nxv8i64( %0, %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_vv_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vrgatherei16.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.nxv8i64( @@ -853,9 +1247,29 @@ define @intrinsic_vrgatherei16_mask_vv_nxv8i64_nxv8i64( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_mask_vv_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl2re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vrgatherei16.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.mask.nxv8i64( @@ -1117,9 +1531,97 @@ define @intrinsic_vrgatherei16_vv_nxv32f16_nxv32f16( %0, %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_vv_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vrgatherei16.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.nxv32f16( @@ -1142,9 +1644,97 @@ define @intrinsic_vrgatherei16_mask_vv_nxv32f16_nxv32f16( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_mask_vv_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vrgatherei16.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.mask.nxv32f16( @@ -1310,9 +1900,97 @@ define @intrinsic_vrgatherei16_vv_nxv16f32_nxv16f32( %0, %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_vv_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vrgatherei16.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.nxv16f32( @@ -1335,9 +2013,51 @@ define @intrinsic_vrgatherei16_mask_vv_nxv16f32_nxv16f32( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_mask_vv_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl4re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vrgatherei16.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.mask.nxv16f32( @@ -1407,9 +2127,97 @@ define @intrinsic_vrgatherei16_vv_nxv8f64_nxv8f64( %0, %1, i32 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_vv_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vrgatherei16.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.nxv8f64( @@ -1432,9 +2240,29 @@ define @intrinsic_vrgatherei16_mask_vv_nxv8f64_nxv8f64( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_mask_vv_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl2re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vrgatherei16.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.mask.nxv8f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zfh,+zvfh -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d,+zfh,+zvfh -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vrgatherei16.vv.nxv1i8( , @@ -538,9 +538,97 @@ define @intrinsic_vrgatherei16_vv_nxv32i16_nxv32i16( %0, %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_vv_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vrgatherei16.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.nxv32i16( @@ -563,9 +651,97 @@ define @intrinsic_vrgatherei16_mask_vv_nxv32i16_nxv32i16( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_mask_vv_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vrgatherei16.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.mask.nxv32i16( @@ -731,9 +907,97 @@ define @intrinsic_vrgatherei16_vv_nxv16i32_nxv16i32( %0, %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_vv_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vrgatherei16.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.nxv16i32( @@ -756,9 +1020,51 @@ define @intrinsic_vrgatherei16_mask_vv_nxv16i32_nxv16i32( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_mask_vv_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl4re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vrgatherei16.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.mask.nxv16i32( @@ -828,9 +1134,97 @@ define @intrinsic_vrgatherei16_vv_nxv8i64_nxv8i64( %0, %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_vv_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vrgatherei16.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.nxv8i64( @@ -853,9 +1247,29 @@ define @intrinsic_vrgatherei16_mask_vv_nxv8i64_nxv8i64( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_mask_vv_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl2re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vrgatherei16.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.mask.nxv8i64( @@ -1117,9 +1531,97 @@ define @intrinsic_vrgatherei16_vv_nxv32f16_nxv32f16( %0, %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_vv_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; CHECK-NEXT: vrgatherei16.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.nxv32f16( @@ -1142,9 +1644,97 @@ define @intrinsic_vrgatherei16_mask_vv_nxv32f16_nxv32f16( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_mask_vv_nxv32f16_nxv32f16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vrgatherei16.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.mask.nxv32f16( @@ -1310,9 +1900,97 @@ define @intrinsic_vrgatherei16_vv_nxv16f32_nxv16f32( %0, %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_vv_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vrgatherei16.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.nxv16f32( @@ -1335,9 +2013,51 @@ define @intrinsic_vrgatherei16_mask_vv_nxv16f32_nxv16f32( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_mask_vv_nxv16f32_nxv16f32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl4re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vrgatherei16.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.mask.nxv16f32( @@ -1407,9 +2127,97 @@ define @intrinsic_vrgatherei16_vv_nxv8f64_nxv8f64( %0, %1, i64 %2) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_vv_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vrgatherei16.vv v24, v8, v16 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.nxv8f64( @@ -1432,9 +2240,29 @@ define @intrinsic_vrgatherei16_mask_vv_nxv8f64_nxv8f64( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vrgatherei16_mask_vv_nxv8f64_nxv8f64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl2re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vrgatherei16.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrgatherei16.vv.mask.nxv8f64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrol-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,CHECK-ZVBB32 -; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,CHECK-ZVBB64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,CHECK-ZVBB32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,CHECK-ZVBB64 declare @llvm.fshl.nxv1i8(, , ) @@ -291,6 +291,52 @@ define @vrol_vv_nxv64i8( %a, %b) { ; CHECK-LABEL: vrol_vv_nxv64i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vand.vi v24, v16, 7 ; CHECK-NEXT: vsll.vv v24, v8, v24 @@ -298,6 +344,50 @@ ; CHECK-NEXT: vand.vi v16, v16, 7 ; CHECK-NEXT: vsrl.vv v8, v8, v16 ; CHECK-NEXT: vor.vv v8, v24, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vrol_vv_nxv64i8: @@ -312,6 +402,52 @@ define @vrol_vx_nxv64i8( %a, i8 %b) { ; CHECK-LABEL: vrol_vx_nxv64i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vand.vi v24, v16, 7 @@ -320,6 +456,50 @@ ; CHECK-NEXT: vand.vi v16, v16, 7 ; CHECK-NEXT: vsrl.vv v8, v8, v16 ; CHECK-NEXT: vor.vv v8, v24, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vrol_vx_nxv64i8: @@ -573,6 +753,52 @@ define @vrol_vv_nxv32i16( %a, %b) { ; CHECK-LABEL: vrol_vv_nxv32i16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vand.vi v24, v16, 15 ; CHECK-NEXT: vsll.vv v24, v8, v24 @@ -580,6 +806,50 @@ ; CHECK-NEXT: vand.vi v16, v16, 15 ; CHECK-NEXT: vsrl.vv v8, v8, v16 ; CHECK-NEXT: vor.vv v8, v24, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vrol_vv_nxv32i16: @@ -594,6 +864,52 @@ define @vrol_vx_nxv32i16( %a, i16 %b) { ; CHECK-LABEL: vrol_vx_nxv32i16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vand.vi v24, v16, 15 @@ -602,6 +918,50 @@ ; CHECK-NEXT: vand.vi v16, v16, 15 ; CHECK-NEXT: vsrl.vv v8, v8, v16 ; CHECK-NEXT: vor.vv v8, v24, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vrol_vx_nxv32i16: @@ -860,6 +1220,52 @@ define @vrol_vv_nxv16i32( %a, %b) { ; CHECK-LABEL: vrol_vv_nxv16i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a0, 31 ; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; CHECK-NEXT: vand.vx v24, v16, a0 @@ -868,6 +1274,50 @@ ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: vsrl.vv v8, v8, v16 ; CHECK-NEXT: vor.vv v8, v24, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vrol_vv_nxv16i32: @@ -893,6 +1343,52 @@ ; ; CHECK-RV64-LABEL: vrol_vx_nxv16i32: ; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: addi sp, sp, -16 +; CHECK-RV64-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV64-NEXT: csrr a1, vlenb +; CHECK-RV64-NEXT: slli a1, a1, 3 +; CHECK-RV64-NEXT: sub sp, sp, a1 +; CHECK-RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-RV64-NEXT: csrr a1, vlenb +; CHECK-RV64-NEXT: slli a2, a1, 3 +; CHECK-RV64-NEXT: sub a1, a2, a1 +; CHECK-RV64-NEXT: add a1, sp, a1 +; CHECK-RV64-NEXT: addi a1, a1, 16 +; CHECK-RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a1, vlenb +; CHECK-RV64-NEXT: li a2, 6 +; CHECK-RV64-NEXT: mul a1, a1, a2 +; CHECK-RV64-NEXT: add a1, sp, a1 +; CHECK-RV64-NEXT: addi a1, a1, 16 +; CHECK-RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a1, vlenb +; CHECK-RV64-NEXT: slli a2, a1, 2 +; CHECK-RV64-NEXT: add a1, a2, a1 +; CHECK-RV64-NEXT: add a1, sp, a1 +; CHECK-RV64-NEXT: addi a1, a1, 16 +; CHECK-RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a1, vlenb +; CHECK-RV64-NEXT: slli a1, a1, 2 +; CHECK-RV64-NEXT: add a1, sp, a1 +; CHECK-RV64-NEXT: addi a1, a1, 16 +; CHECK-RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a1, vlenb +; CHECK-RV64-NEXT: slli a2, a1, 1 +; CHECK-RV64-NEXT: add a1, a2, a1 +; CHECK-RV64-NEXT: add a1, sp, a1 +; CHECK-RV64-NEXT: addi a1, a1, 16 +; CHECK-RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a1, vlenb +; CHECK-RV64-NEXT: slli a1, a1, 1 +; CHECK-RV64-NEXT: add a1, sp, a1 +; CHECK-RV64-NEXT: addi a1, a1, 16 +; CHECK-RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a1, vlenb +; CHECK-RV64-NEXT: add a1, sp, a1 +; CHECK-RV64-NEXT: addi a1, a1, 16 +; CHECK-RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: addi a1, sp, 16 +; CHECK-RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; CHECK-RV64-NEXT: vmv.v.x v16, a0 ; CHECK-RV64-NEXT: li a0, 31 @@ -902,6 +1398,50 @@ ; CHECK-RV64-NEXT: vand.vx v16, v16, a0 ; CHECK-RV64-NEXT: vsrl.vv v8, v8, v16 ; CHECK-RV64-NEXT: vor.vv v8, v24, v8 +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a1, a0, 3 +; CHECK-RV64-NEXT: sub a0, a1, a0 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: li a1, 6 +; CHECK-RV64-NEXT: mul a0, a0, a1 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a1, a0, 2 +; CHECK-RV64-NEXT: add a0, a1, a0 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a0, a0, 2 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a1, a0, 1 +; CHECK-RV64-NEXT: add a0, a1, a0 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a0, a0, 1 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: addi a0, sp, 16 +; CHECK-RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a0, a0, 3 +; CHECK-RV64-NEXT: add sp, sp, a0 +; CHECK-RV64-NEXT: addi sp, sp, 16 ; CHECK-RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: vrol_vx_nxv16i32: @@ -1157,6 +1697,52 @@ define @vrol_vv_nxv8i64( %a, %b) { ; CHECK-LABEL: vrol_vv_nxv8i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a0, 63 ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vand.vx v24, v16, a0 @@ -1165,6 +1751,50 @@ ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: vsrl.vv v8, v8, v16 ; CHECK-NEXT: vor.vv v8, v24, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vrol_vv_nxv8i64: @@ -1181,6 +1811,50 @@ ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: addi sp, sp, -16 ; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV32-NEXT: csrr a2, vlenb +; CHECK-RV32-NEXT: slli a2, a2, 3 +; CHECK-RV32-NEXT: sub sp, sp, a2 +; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-RV32-NEXT: csrr a2, vlenb +; CHECK-RV32-NEXT: slli a3, a2, 3 +; CHECK-RV32-NEXT: sub a2, a3, a2 +; CHECK-RV32-NEXT: add a2, sp, a2 +; CHECK-RV32-NEXT: addi a2, a2, 16 +; CHECK-RV32-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a2, vlenb +; CHECK-RV32-NEXT: li a3, 6 +; CHECK-RV32-NEXT: mul a2, a2, a3 +; CHECK-RV32-NEXT: add a2, sp, a2 +; CHECK-RV32-NEXT: addi a2, a2, 16 +; CHECK-RV32-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a2, vlenb +; CHECK-RV32-NEXT: slli a3, a2, 2 +; CHECK-RV32-NEXT: add a2, a3, a2 +; CHECK-RV32-NEXT: add a2, sp, a2 +; CHECK-RV32-NEXT: addi a2, a2, 16 +; CHECK-RV32-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a2, vlenb +; CHECK-RV32-NEXT: slli a2, a2, 2 +; CHECK-RV32-NEXT: add a2, sp, a2 +; CHECK-RV32-NEXT: addi a2, a2, 16 +; CHECK-RV32-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a2, vlenb +; CHECK-RV32-NEXT: slli a3, a2, 1 +; CHECK-RV32-NEXT: add a2, a3, a2 +; CHECK-RV32-NEXT: add a2, sp, a2 +; CHECK-RV32-NEXT: addi a2, a2, 16 +; CHECK-RV32-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a2, vlenb +; CHECK-RV32-NEXT: slli a2, a2, 1 +; CHECK-RV32-NEXT: add a2, sp, a2 +; CHECK-RV32-NEXT: addi a2, a2, 16 +; CHECK-RV32-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a2, vlenb +; CHECK-RV32-NEXT: add a2, sp, a2 +; CHECK-RV32-NEXT: addi a2, a2, 16 +; CHECK-RV32-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: addi a2, sp, 16 +; CHECK-RV32-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: sw a1, 12(sp) ; CHECK-RV32-NEXT: sw a0, 8(sp) ; CHECK-RV32-NEXT: addi a0, sp, 8 @@ -1193,6 +1867,49 @@ ; CHECK-RV32-NEXT: vand.vx v16, v16, a0 ; CHECK-RV32-NEXT: vsrl.vv v8, v8, v16 ; CHECK-RV32-NEXT: vor.vv v8, v24, v8 +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 3 +; CHECK-RV32-NEXT: sub a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: li a1, 6 +; CHECK-RV32-NEXT: mul a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 2 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 1 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: addi a0, sp, 16 +; CHECK-RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 3 +; CHECK-RV32-NEXT: add sp, sp, a0 ; CHECK-RV32-NEXT: addi sp, sp, 16 ; CHECK-RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vrol.ll b/llvm/test/CodeGen/RISCV/rvv/vrol.ll --- a/llvm/test/CodeGen/RISCV/rvv/vrol.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrol.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+experimental-zvbb \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+experimental-zvbb \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK declare @llvm.riscv.vrol.nxv1i8.nxv1i8( @@ -319,9 +319,97 @@ define @intrinsic_vrol_mask_vv_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vrol_mask_vv_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vrol.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrol.mask.nxv64i8.nxv64i8( @@ -602,9 +690,97 @@ define @intrinsic_vrol_mask_vv_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vrol_mask_vv_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vrol.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrol.mask.nxv32i16.nxv32i16( @@ -838,9 +1014,97 @@ define @intrinsic_vrol_mask_vv_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vrol_mask_vv_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vrol.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrol.mask.nxv16i32.nxv16i32( @@ -1027,9 +1291,97 @@ define @intrinsic_vrol_mask_vv_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vrol_mask_vv_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vrol.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vrol.mask.nxv8i64.nxv8i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll --- a/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vror-sdnode.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32 -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64 -; RUN: llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,CHECK-ZVBB32 -; RUN: llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,CHECK-ZVBB64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-RV64 +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,CHECK-ZVBB32 +; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+experimental-zvbb -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK-ZVBB,CHECK-ZVBB64 declare @llvm.fshr.nxv1i8(, , ) declare @llvm.fshl.nxv1i8(, , ) @@ -514,6 +514,52 @@ define @vror_vv_nxv64i8( %a, %b) { ; CHECK-LABEL: vror_vv_nxv64i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e8, m8, ta, ma ; CHECK-NEXT: vand.vi v24, v16, 7 ; CHECK-NEXT: vsrl.vv v24, v8, v24 @@ -521,6 +567,50 @@ ; CHECK-NEXT: vand.vi v16, v16, 7 ; CHECK-NEXT: vsll.vv v8, v8, v16 ; CHECK-NEXT: vor.vv v8, v24, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vror_vv_nxv64i8: @@ -535,6 +625,52 @@ define @vror_vx_nxv64i8( %a, i8 %b) { ; CHECK-LABEL: vror_vx_nxv64i8: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vand.vi v24, v16, 7 @@ -543,6 +679,50 @@ ; CHECK-NEXT: vand.vi v16, v16, 7 ; CHECK-NEXT: vsll.vv v8, v8, v16 ; CHECK-NEXT: vor.vv v8, v24, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vror_vx_nxv64i8: @@ -1018,6 +1198,52 @@ define @vror_vv_nxv32i16( %a, %b) { ; CHECK-LABEL: vror_vv_nxv32i16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; CHECK-NEXT: vand.vi v24, v16, 15 ; CHECK-NEXT: vsrl.vv v24, v8, v24 @@ -1025,6 +1251,50 @@ ; CHECK-NEXT: vand.vi v16, v16, 15 ; CHECK-NEXT: vsll.vv v8, v8, v16 ; CHECK-NEXT: vor.vv v8, v24, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vror_vv_nxv32i16: @@ -1039,6 +1309,52 @@ define @vror_vx_nxv32i16( %a, i16 %b) { ; CHECK-LABEL: vror_vx_nxv32i16: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma ; CHECK-NEXT: vmv.v.x v16, a0 ; CHECK-NEXT: vand.vi v24, v16, 15 @@ -1047,6 +1363,50 @@ ; CHECK-NEXT: vand.vi v16, v16, 15 ; CHECK-NEXT: vsll.vv v8, v8, v16 ; CHECK-NEXT: vor.vv v8, v24, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vror_vx_nxv32i16: @@ -1490,6 +1850,52 @@ define @vror_vv_nxv16i32( %a, %b) { ; CHECK-LABEL: vror_vv_nxv16i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a0, 31 ; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; CHECK-NEXT: vand.vx v24, v16, a0 @@ -1498,6 +1904,50 @@ ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: vsll.vv v8, v8, v16 ; CHECK-NEXT: vor.vv v8, v24, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vror_vv_nxv16i32: @@ -1523,6 +1973,52 @@ ; ; CHECK-RV64-LABEL: vror_vx_nxv16i32: ; CHECK-RV64: # %bb.0: +; CHECK-RV64-NEXT: addi sp, sp, -16 +; CHECK-RV64-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV64-NEXT: csrr a1, vlenb +; CHECK-RV64-NEXT: slli a1, a1, 3 +; CHECK-RV64-NEXT: sub sp, sp, a1 +; CHECK-RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-RV64-NEXT: csrr a1, vlenb +; CHECK-RV64-NEXT: slli a2, a1, 3 +; CHECK-RV64-NEXT: sub a1, a2, a1 +; CHECK-RV64-NEXT: add a1, sp, a1 +; CHECK-RV64-NEXT: addi a1, a1, 16 +; CHECK-RV64-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a1, vlenb +; CHECK-RV64-NEXT: li a2, 6 +; CHECK-RV64-NEXT: mul a1, a1, a2 +; CHECK-RV64-NEXT: add a1, sp, a1 +; CHECK-RV64-NEXT: addi a1, a1, 16 +; CHECK-RV64-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a1, vlenb +; CHECK-RV64-NEXT: slli a2, a1, 2 +; CHECK-RV64-NEXT: add a1, a2, a1 +; CHECK-RV64-NEXT: add a1, sp, a1 +; CHECK-RV64-NEXT: addi a1, a1, 16 +; CHECK-RV64-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a1, vlenb +; CHECK-RV64-NEXT: slli a1, a1, 2 +; CHECK-RV64-NEXT: add a1, sp, a1 +; CHECK-RV64-NEXT: addi a1, a1, 16 +; CHECK-RV64-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a1, vlenb +; CHECK-RV64-NEXT: slli a2, a1, 1 +; CHECK-RV64-NEXT: add a1, a2, a1 +; CHECK-RV64-NEXT: add a1, sp, a1 +; CHECK-RV64-NEXT: addi a1, a1, 16 +; CHECK-RV64-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a1, vlenb +; CHECK-RV64-NEXT: slli a1, a1, 1 +; CHECK-RV64-NEXT: add a1, sp, a1 +; CHECK-RV64-NEXT: addi a1, a1, 16 +; CHECK-RV64-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a1, vlenb +; CHECK-RV64-NEXT: add a1, sp, a1 +; CHECK-RV64-NEXT: addi a1, a1, 16 +; CHECK-RV64-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: addi a1, sp, 16 +; CHECK-RV64-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; CHECK-RV64-NEXT: vmv.v.x v16, a0 ; CHECK-RV64-NEXT: li a0, 31 @@ -1532,6 +2028,50 @@ ; CHECK-RV64-NEXT: vand.vx v16, v16, a0 ; CHECK-RV64-NEXT: vsll.vv v8, v8, v16 ; CHECK-RV64-NEXT: vor.vv v8, v24, v8 +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a1, a0, 3 +; CHECK-RV64-NEXT: sub a0, a1, a0 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: li a1, 6 +; CHECK-RV64-NEXT: mul a0, a0, a1 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a1, a0, 2 +; CHECK-RV64-NEXT: add a0, a1, a0 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a0, a0, 2 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a1, a0, 1 +; CHECK-RV64-NEXT: add a0, a1, a0 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a0, a0, 1 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: addi a0, sp, 16 +; CHECK-RV64-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a0, a0, 3 +; CHECK-RV64-NEXT: add sp, sp, a0 +; CHECK-RV64-NEXT: addi sp, sp, 16 ; CHECK-RV64-NEXT: ret ; ; CHECK-ZVBB-LABEL: vror_vx_nxv16i32: @@ -2025,6 +2565,52 @@ define @vror_vv_nxv8i64( %a, %b) { ; CHECK-LABEL: vror_vv_nxv8i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: sub sp, sp, a0 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a0, 63 ; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vand.vx v24, v16, a0 @@ -2033,6 +2619,50 @@ ; CHECK-NEXT: vand.vx v16, v16, a0 ; CHECK-NEXT: vsll.vv v8, v8, v16 ; CHECK-NEXT: vor.vv v8, v24, v8 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vror_vv_nxv8i64: @@ -2049,6 +2679,50 @@ ; CHECK-RV32: # %bb.0: ; CHECK-RV32-NEXT: addi sp, sp, -16 ; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV32-NEXT: csrr a2, vlenb +; CHECK-RV32-NEXT: slli a2, a2, 3 +; CHECK-RV32-NEXT: sub sp, sp, a2 +; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-RV32-NEXT: csrr a2, vlenb +; CHECK-RV32-NEXT: slli a3, a2, 3 +; CHECK-RV32-NEXT: sub a2, a3, a2 +; CHECK-RV32-NEXT: add a2, sp, a2 +; CHECK-RV32-NEXT: addi a2, a2, 16 +; CHECK-RV32-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a2, vlenb +; CHECK-RV32-NEXT: li a3, 6 +; CHECK-RV32-NEXT: mul a2, a2, a3 +; CHECK-RV32-NEXT: add a2, sp, a2 +; CHECK-RV32-NEXT: addi a2, a2, 16 +; CHECK-RV32-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a2, vlenb +; CHECK-RV32-NEXT: slli a3, a2, 2 +; CHECK-RV32-NEXT: add a2, a3, a2 +; CHECK-RV32-NEXT: add a2, sp, a2 +; CHECK-RV32-NEXT: addi a2, a2, 16 +; CHECK-RV32-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a2, vlenb +; CHECK-RV32-NEXT: slli a2, a2, 2 +; CHECK-RV32-NEXT: add a2, sp, a2 +; CHECK-RV32-NEXT: addi a2, a2, 16 +; CHECK-RV32-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a2, vlenb +; CHECK-RV32-NEXT: slli a3, a2, 1 +; CHECK-RV32-NEXT: add a2, a3, a2 +; CHECK-RV32-NEXT: add a2, sp, a2 +; CHECK-RV32-NEXT: addi a2, a2, 16 +; CHECK-RV32-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a2, vlenb +; CHECK-RV32-NEXT: slli a2, a2, 1 +; CHECK-RV32-NEXT: add a2, sp, a2 +; CHECK-RV32-NEXT: addi a2, a2, 16 +; CHECK-RV32-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a2, vlenb +; CHECK-RV32-NEXT: add a2, sp, a2 +; CHECK-RV32-NEXT: addi a2, a2, 16 +; CHECK-RV32-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: addi a2, sp, 16 +; CHECK-RV32-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: sw a1, 12(sp) ; CHECK-RV32-NEXT: sw a0, 8(sp) ; CHECK-RV32-NEXT: addi a0, sp, 8 @@ -2061,6 +2735,49 @@ ; CHECK-RV32-NEXT: vand.vx v16, v16, a0 ; CHECK-RV32-NEXT: vsll.vv v8, v8, v16 ; CHECK-RV32-NEXT: vor.vv v8, v24, v8 +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 3 +; CHECK-RV32-NEXT: sub a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: li a1, 6 +; CHECK-RV32-NEXT: mul a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 2 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 1 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: addi a0, sp, 16 +; CHECK-RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 3 +; CHECK-RV32-NEXT: add sp, sp, a0 ; CHECK-RV32-NEXT: addi sp, sp, 16 ; CHECK-RV32-NEXT: ret ; @@ -2102,6 +2819,52 @@ define @vror_vi_nxv8i64( %a) { ; CHECK-RV32-LABEL: vror_vi_nxv8i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -16 +; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 3 +; CHECK-RV32-NEXT: sub sp, sp, a0 +; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 3 +; CHECK-RV32-NEXT: sub a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: li a1, 6 +; CHECK-RV32-NEXT: mul a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 2 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 1 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: addi a0, sp, 16 +; CHECK-RV32-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: li a0, 63 ; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-RV32-NEXT: vmv.v.x v16, a0 @@ -2112,6 +2875,50 @@ ; CHECK-RV32-NEXT: vand.vx v24, v24, a0 ; CHECK-RV32-NEXT: vsll.vv v8, v8, v24 ; CHECK-RV32-NEXT: vor.vv v8, v16, v8 +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 3 +; CHECK-RV32-NEXT: sub a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: li a1, 6 +; CHECK-RV32-NEXT: mul a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 2 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 1 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: addi a0, sp, 16 +; CHECK-RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 3 +; CHECK-RV32-NEXT: add sp, sp, a0 +; CHECK-RV32-NEXT: addi sp, sp, 16 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_nxv8i64: @@ -2135,6 +2942,52 @@ define @vror_vi_rotl_nxv8i64( %a) { ; CHECK-RV32-LABEL: vror_vi_rotl_nxv8i64: ; CHECK-RV32: # %bb.0: +; CHECK-RV32-NEXT: addi sp, sp, -16 +; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 3 +; CHECK-RV32-NEXT: sub sp, sp, a0 +; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 3 +; CHECK-RV32-NEXT: sub a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: li a1, 6 +; CHECK-RV32-NEXT: mul a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 2 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v26, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v27, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 1 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v28, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v29, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vs1r.v v30, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: addi a0, sp, 16 +; CHECK-RV32-NEXT: vs1r.v v31, (a0) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: li a0, 63 ; CHECK-RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-RV32-NEXT: vmv.v.x v16, a0 @@ -2145,6 +2998,50 @@ ; CHECK-RV32-NEXT: vand.vx v24, v24, a0 ; CHECK-RV32-NEXT: vsrl.vv v8, v8, v24 ; CHECK-RV32-NEXT: vor.vv v8, v16, v8 +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 3 +; CHECK-RV32-NEXT: sub a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: li a1, 6 +; CHECK-RV32-NEXT: mul a0, a0, a1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 2 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 2 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a1, a0, 1 +; CHECK-RV32-NEXT: add a0, a1, a0 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 1 +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: add a0, sp, a0 +; CHECK-RV32-NEXT: addi a0, a0, 16 +; CHECK-RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: addi a0, sp, 16 +; CHECK-RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: csrr a0, vlenb +; CHECK-RV32-NEXT: slli a0, a0, 3 +; CHECK-RV32-NEXT: add sp, sp, a0 +; CHECK-RV32-NEXT: addi sp, sp, 16 ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: vror_vi_rotl_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/vror.ll b/llvm/test/CodeGen/RISCV/rvv/vror.ll --- a/llvm/test/CodeGen/RISCV/rvv/vror.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vror.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvbb \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+experimental-zvbb \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvbb \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+experimental-zvbb \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK declare @llvm.riscv.vror.nxv1i8.nxv1i8( @@ -319,9 +319,97 @@ define @intrinsic_vror_mask_vv_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vror_mask_vv_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vror.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vror.mask.nxv64i8.nxv64i8( @@ -602,9 +690,97 @@ define @intrinsic_vror_mask_vv_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vror_mask_vv_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vror.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vror.mask.nxv32i16.nxv32i16( @@ -838,9 +1014,97 @@ define @intrinsic_vror_mask_vv_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vror_mask_vv_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vror.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vror.mask.nxv16i32.nxv16i32( @@ -1027,9 +1291,97 @@ define @intrinsic_vror_mask_vv_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vror_mask_vv_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vror.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vror.mask.nxv8i64.nxv8i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vrsub.ll b/llvm/test/CodeGen/RISCV/rvv/vrsub.ll --- a/llvm/test/CodeGen/RISCV/rvv/vrsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrsub.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vrsub.nxv1i8.i8( , @@ -1086,12 +1086,98 @@ ; RV32-LABEL: intrinsic_vrsub_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vsub.vv v8, v24, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsadd.nxv1i8.nxv1i8( , @@ -316,9 +316,97 @@ define @intrinsic_vsadd_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vsadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsadd.mask.nxv64i8.nxv64i8( @@ -599,9 +687,97 @@ define @intrinsic_vsadd_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vsadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsadd.mask.nxv32i16.nxv32i16( @@ -835,9 +1011,97 @@ define @intrinsic_vsadd_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vsadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsadd.mask.nxv16i32.nxv16i32( @@ -1024,9 +1288,97 @@ define @intrinsic_vsadd_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vsadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsadd.mask.nxv8i64.nxv8i64( @@ -2102,12 +2454,98 @@ ; CHECK-LABEL: intrinsic_vsadd_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 3 +; CHECK-NEXT: sub a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: li a4, 6 +; CHECK-NEXT: mul a3, a3, a4 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 2 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: sw a1, 12(sp) ; CHECK-NEXT: sw a0, 8(sp) ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsadd-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsadd-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsadd-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsadd-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsadd.nxv1i8.nxv1i8( , @@ -316,9 +316,97 @@ define @intrinsic_vsadd_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vsadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsadd.mask.nxv64i8.nxv64i8( @@ -599,9 +687,97 @@ define @intrinsic_vsadd_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vsadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsadd.mask.nxv32i16.nxv32i16( @@ -835,9 +1011,97 @@ define @intrinsic_vsadd_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vsadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsadd.mask.nxv16i32.nxv16i32( @@ -1024,9 +1288,97 @@ define @intrinsic_vsadd_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vsadd_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vsadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsadd.mask.nxv8i64.nxv8i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsaddu.nxv1i8.nxv1i8( , @@ -316,9 +316,97 @@ define @intrinsic_vsaddu_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vsaddu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsaddu.mask.nxv64i8.nxv64i8( @@ -599,9 +687,97 @@ define @intrinsic_vsaddu_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vsaddu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsaddu.mask.nxv32i16.nxv32i16( @@ -835,9 +1011,97 @@ define @intrinsic_vsaddu_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vsaddu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsaddu.mask.nxv16i32.nxv16i32( @@ -1024,9 +1288,97 @@ define @intrinsic_vsaddu_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vsaddu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsaddu.mask.nxv8i64.nxv8i64( @@ -2102,12 +2454,98 @@ ; CHECK-LABEL: intrinsic_vsaddu_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 3 +; CHECK-NEXT: sub a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: li a4, 6 +; CHECK-NEXT: mul a3, a3, a4 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 2 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: sw a1, 12(sp) ; CHECK-NEXT: sw a0, 8(sp) ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vsaddu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsaddu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vsaddu.nxv1i8.nxv1i8( , @@ -316,9 +316,97 @@ define @intrinsic_vsaddu_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vsaddu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsaddu.mask.nxv64i8.nxv64i8( @@ -599,9 +687,97 @@ define @intrinsic_vsaddu_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vsaddu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsaddu.mask.nxv32i16.nxv32i16( @@ -835,9 +1011,97 @@ define @intrinsic_vsaddu_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vsaddu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsaddu.mask.nxv16i32.nxv16i32( @@ -1024,9 +1288,97 @@ define @intrinsic_vsaddu_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vsaddu_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vsaddu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsaddu.mask.nxv8i64.nxv8i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll @@ -395,9 +395,57 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 23 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 22 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 21 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 20 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 19 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb @@ -425,7 +473,55 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -437,6 +533,58 @@ define void @vselect_legalize_regression( %a, %ma, %mb, * %out) { ; CHECK-LABEL: vselect_legalize_regression: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x0a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 10 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a2, zero, e8, m2, ta, ma ; CHECK-NEXT: vlm.v v24, (a0) ; CHECK-NEXT: vmand.mm v1, v0, v24 @@ -453,6 +601,56 @@ ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: vs8r.v v16, (a0) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %cond = and %ma, %mb %sel = select %cond, %a, zeroinitializer diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll @@ -350,9 +350,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 30 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 29 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 28 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 27 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 26 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 25 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 23 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 4 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 @@ -391,7 +480,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 30 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 29 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 28 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 27 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 25 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -407,9 +585,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 30 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 29 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 28 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 27 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 26 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 25 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 23 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 22 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 21 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 20 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 19 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 18 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 @@ -448,7 +715,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 30 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 29 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 28 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 27 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 25 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret @@ -694,9 +1050,98 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 30 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 29 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 28 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v3, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 27 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 26 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 25 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 23 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 4 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 @@ -734,7 +1179,96 @@ ; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 30 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 29 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 28 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 27 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 25 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 5 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -329,8 +329,95 @@ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli s0, a0, e64, m1, ta, ma ; CHECK-NEXT: beqz a1, .LBB6_2 ; CHECK-NEXT: # %bb.1: # %if.then @@ -354,7 +441,94 @@ ; CHECK-NEXT: vfsub.vv v8, v9, v8 ; CHECK-NEXT: .LBB6_3: # %if.then ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -388,8 +562,95 @@ ; CHECK-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; CHECK-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a2, a2, a3 ; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v1, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 4 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v2, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 14 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v3, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 13 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 12 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 11 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 10 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a2, a3 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, a2, sp +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli s0, a0, e64, m1, ta, ma ; CHECK-NEXT: beqz a1, .LBB7_2 ; CHECK-NEXT: # %bb.1: # %if.then @@ -414,7 +675,94 @@ ; CHECK-NEXT: vsetvli zero, s0, e64, m1, ta, ma ; CHECK-NEXT: vfmul.vv v8, v9, v8 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 14 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v3, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 13 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 12 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 11 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, a0, sp +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsext-vp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+v < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+v < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+v < %s | FileCheck %s declare @llvm.vp.sext.nxv2i16.nxv2i8(, , i32) @@ -167,10 +167,100 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB12_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsext.vf4 v24, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.sext.nxv32i32.nxv32i8( %a, %m, i32 %vl) ret %v @@ -191,9 +281,99 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB13_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vsext.vf4 v24, v8 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.sext.nxv32i32.nxv32i8( %a, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), i32 %vl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vsha2ch.ll b/llvm/test/CodeGen/RISCV/rvv/vsha2ch.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsha2ch.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsha2ch.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvknhb \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+experimental-zvknhb \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvknhb \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+experimental-zvknhb \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK declare @llvm.riscv.vsha2ch.nxv4i32.nxv4i32( @@ -62,9 +62,97 @@ define @intrinsic_vsha2ch_vv_nxv16i32_nxv16i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsha2ch_vv_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma ; CHECK-NEXT: vsha2ch.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsha2ch.nxv16i32.nxv16i32( @@ -111,9 +199,97 @@ define @intrinsic_vsha2ch_vv_nxv8i64_nxv8i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsha2ch_vv_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma ; CHECK-NEXT: vsha2ch.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsha2ch.nxv8i64.nxv8i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vsha2cl.ll b/llvm/test/CodeGen/RISCV/rvv/vsha2cl.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsha2cl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsha2cl.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvknhb \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+experimental-zvknhb \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvknhb \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+experimental-zvknhb \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK declare @llvm.riscv.vsha2cl.nxv4i32.nxv4i32( @@ -62,9 +62,97 @@ define @intrinsic_vsha2cl_vv_nxv16i32_nxv16i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsha2cl_vv_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma ; CHECK-NEXT: vsha2ch.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsha2cl.nxv16i32.nxv16i32( @@ -111,9 +199,97 @@ define @intrinsic_vsha2cl_vv_nxv8i64_nxv8i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsha2cl_vv_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma ; CHECK-NEXT: vsha2ch.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsha2cl.nxv8i64.nxv8i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vsha2ms.ll b/llvm/test/CodeGen/RISCV/rvv/vsha2ms.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsha2ms.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsha2ms.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvknha,+experimental-zvknhb \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+experimental-zvknha,+experimental-zvknhb \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvknha,+experimental-zvknhb \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+experimental-zvknha,+experimental-zvknhb \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK declare @llvm.riscv.vsha2ms.nxv4i32.nxv4i32( @@ -62,9 +62,97 @@ define @intrinsic_vsha2ms_vv_nxv16i32_nxv16i32( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsha2ms_vv_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma ; CHECK-NEXT: vsha2ms.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsha2ms.nxv16i32.nxv16i32( @@ -111,9 +199,97 @@ define @intrinsic_vsha2ms_vv_nxv8i64_nxv8i64( %0, %1, %2, iXLen %3) nounwind { ; CHECK-LABEL: intrinsic_vsha2ms_vv_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma ; CHECK-NEXT: vsha2ms.vv v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsha2ms.nxv8i64.nxv8i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsitofp-vp.ll @@ -306,6 +306,52 @@ define @vsitofp_nxv32f16_nxv32i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vsitofp_nxv32f16_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 @@ -326,6 +372,50 @@ ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfncvt.f.x.w v24, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.sitofp.nxv32f16.nxv32i32( %va, %m, i32 %evl) ret %v @@ -336,6 +426,14 @@ define @vsitofp_nxv32f32_nxv32i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vsitofp_nxv32f32_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 @@ -355,6 +453,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.sitofp.nxv32f32.nxv32i32( %va, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vslide1up-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v,+f -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vslide1up.nxv1i8.i8( , @@ -1066,6 +1066,50 @@ define @intrinsic_vslide1up_mask_vx_nxv8i64_nxv8i64_i64( %0, %1, i64 %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vslide1up_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 3 +; CHECK-NEXT: sub a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: li a4, 6 +; CHECK-NEXT: mul a3, a3, a4 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 2 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a3, a2, e64, m8, ta, ma ; CHECK-NEXT: slli a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma @@ -1073,6 +1117,50 @@ ; CHECK-NEXT: vslide1up.vx v16, v24, a0 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vslide1up.mask.nxv8i64.i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vsll.ll b/llvm/test/CodeGen/RISCV/rvv/vsll.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsll.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsll.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s declare @llvm.riscv.vsll.nxv1i8.nxv1i8( , @@ -318,9 +318,97 @@ define @intrinsic_vsll_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsll_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vsll.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsll.mask.nxv64i8.nxv64i8( @@ -601,9 +689,97 @@ define @intrinsic_vsll_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsll_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vsll.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsll.mask.nxv32i16.nxv32i16( @@ -837,9 +1013,97 @@ define @intrinsic_vsll_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsll_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vsll.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsll.mask.nxv16i32.nxv16i32( @@ -1026,9 +1290,97 @@ define @intrinsic_vsll_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsll_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vsll.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsll.mask.nxv8i64.nxv8i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsmul-rv32.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s -; RUN: not --crash llc -mtriple=riscv32 -mattr=+zve64d 2>&1 \ +; RUN: not --crash llc -mtriple=riscv32 -mattr=+m,+zve64d 2>&1 \ ; RUN: < %s | FileCheck %s --check-prefixes=ZVE64D ; ZVE64D: LLVM ERROR: Cannot select: intrinsic %llvm.riscv.vsmul @@ -327,10 +327,98 @@ define @intrinsic_vsmul_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 0 ; CHECK-NEXT: vsmul.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsmul.mask.nxv64i8.nxv64i8( @@ -616,10 +704,98 @@ define @intrinsic_vsmul_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 0 ; CHECK-NEXT: vsmul.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsmul.mask.nxv32i16.nxv32i16( @@ -857,10 +1033,98 @@ define @intrinsic_vsmul_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 0 ; CHECK-NEXT: vsmul.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsmul.mask.nxv16i32.nxv16i32( @@ -1050,10 +1314,98 @@ define @intrinsic_vsmul_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 0 ; CHECK-NEXT: vsmul.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsmul.mask.nxv8i64.nxv8i64( @@ -2145,6 +2497,49 @@ ; CHECK-LABEL: intrinsic_vsmul_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 3 +; CHECK-NEXT: sub a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: li a4, 6 +; CHECK-NEXT: mul a3, a3, a4 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 2 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: sw a1, 12(sp) ; CHECK-NEXT: sw a0, 8(sp) ; CHECK-NEXT: addi a0, sp, 8 @@ -2152,6 +2547,49 @@ ; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: csrwi vxrm, 0 ; CHECK-NEXT: vsmul.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vsmul-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vsmul-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsmul-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsmul-rv64.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s -; RUN: not --crash llc -mtriple=riscv64 -mattr=+zve64d 2>&1 \ +; RUN: not --crash llc -mtriple=riscv64 -mattr=+m,+zve64d 2>&1 \ ; RUN: < %s | FileCheck %s --check-prefixes=ZVE64D ; ZVE64D: LLVM ERROR: Cannot select: intrinsic %llvm.riscv.vsmul @@ -327,10 +327,98 @@ define @intrinsic_vsmul_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 0 ; CHECK-NEXT: vsmul.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsmul.mask.nxv64i8.nxv64i8( @@ -616,10 +704,98 @@ define @intrinsic_vsmul_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 0 ; CHECK-NEXT: vsmul.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsmul.mask.nxv32i16.nxv32i16( @@ -857,10 +1033,98 @@ define @intrinsic_vsmul_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 0 ; CHECK-NEXT: vsmul.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsmul.mask.nxv16i32.nxv16i32( @@ -1050,10 +1314,98 @@ define @intrinsic_vsmul_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vsmul_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: csrwi vxrm, 0 ; CHECK-NEXT: vsmul.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsmul.mask.nxv8i64.nxv8i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vsra.ll b/llvm/test/CodeGen/RISCV/rvv/vsra.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsra.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsra.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s declare @llvm.riscv.vsra.nxv1i8.nxv1i8( , @@ -318,9 +318,97 @@ define @intrinsic_vsra_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsra_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vsra.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsra.mask.nxv64i8.nxv64i8( @@ -601,9 +689,97 @@ define @intrinsic_vsra_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsra_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vsra.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsra.mask.nxv32i16.nxv32i16( @@ -837,9 +1013,97 @@ define @intrinsic_vsra_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsra_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vsra.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsra.mask.nxv16i32.nxv16i32( @@ -1026,9 +1290,97 @@ define @intrinsic_vsra_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsra_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vsra.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsra.mask.nxv8i64.nxv8i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vsrl.ll b/llvm/test/CodeGen/RISCV/rvv/vsrl.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsrl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsrl.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s declare @llvm.riscv.vsrl.nxv1i8.nxv1i8( , @@ -318,9 +318,97 @@ define @intrinsic_vsrl_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsrl_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vsrl.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsrl.mask.nxv64i8.nxv64i8( @@ -601,9 +689,97 @@ define @intrinsic_vsrl_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsrl_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vsrl.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsrl.mask.nxv32i16.nxv32i16( @@ -837,9 +1013,97 @@ define @intrinsic_vsrl_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsrl_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vsrl.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsrl.mask.nxv16i32.nxv16i32( @@ -1026,9 +1290,97 @@ define @intrinsic_vsrl_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsrl_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vsrl.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsrl.mask.nxv8i64.nxv8i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssub-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vssub.nxv1i8.nxv1i8( , @@ -316,9 +316,97 @@ define @intrinsic_vssub_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vssub.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vssub.mask.nxv64i8.nxv64i8( @@ -599,9 +687,97 @@ define @intrinsic_vssub_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vssub.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vssub.mask.nxv32i16.nxv32i16( @@ -835,9 +1011,97 @@ define @intrinsic_vssub_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vssub.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vssub.mask.nxv16i32.nxv16i32( @@ -1024,9 +1288,97 @@ define @intrinsic_vssub_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vssub.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vssub.mask.nxv8i64.nxv8i64( @@ -2102,12 +2454,98 @@ ; CHECK-LABEL: intrinsic_vssub_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 3 +; CHECK-NEXT: sub a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: li a4, 6 +; CHECK-NEXT: mul a3, a3, a4 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 2 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: sw a1, 12(sp) ; CHECK-NEXT: sw a0, 8(sp) ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vssub.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vssub-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vssub-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vssub-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssub-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vssub.nxv1i8.nxv1i8( , @@ -316,9 +316,97 @@ define @intrinsic_vssub_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vssub.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vssub.mask.nxv64i8.nxv64i8( @@ -599,9 +687,97 @@ define @intrinsic_vssub_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vssub.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vssub.mask.nxv32i16.nxv32i16( @@ -835,9 +1011,97 @@ define @intrinsic_vssub_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vssub.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vssub.mask.nxv16i32.nxv16i32( @@ -1024,9 +1288,97 @@ define @intrinsic_vssub_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vssub_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vssub.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vssub.mask.nxv8i64.nxv8i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-rv32.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv32 -mattr=+m,+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vssubu.nxv1i8.nxv1i8( , @@ -316,9 +316,97 @@ define @intrinsic_vssubu_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vssubu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vssubu.mask.nxv64i8.nxv64i8( @@ -599,9 +687,97 @@ define @intrinsic_vssubu_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vssubu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vssubu.mask.nxv32i16.nxv32i16( @@ -835,9 +1011,97 @@ define @intrinsic_vssubu_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vssubu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vssubu.mask.nxv16i32.nxv16i32( @@ -1024,9 +1288,97 @@ define @intrinsic_vssubu_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i32 %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vssubu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vssubu.mask.nxv8i64.nxv8i64( @@ -2102,12 +2454,98 @@ ; CHECK-LABEL: intrinsic_vssubu_mask_vx_nxv8i64_nxv8i64_i64: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: sub sp, sp, a3 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 3 +; CHECK-NEXT: sub a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: li a4, 6 +; CHECK-NEXT: mul a3, a3, a4 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 2 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 2 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a4, a3, 1 +; CHECK-NEXT: add a3, a4, a3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; CHECK-NEXT: sw a1, 12(sp) ; CHECK-NEXT: sw a0, 8(sp) ; CHECK-NEXT: addi a0, sp, 8 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; CHECK-NEXT: vlse64.v v24, (a0), zero ; CHECK-NEXT: vssubu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vssubu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vssubu-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vssubu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vssubu-rv64.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs \ +; RUN: llc -mtriple=riscv64 -mattr=+m,+v -verify-machineinstrs \ ; RUN: < %s | FileCheck %s declare @llvm.riscv.vssubu.nxv1i8.nxv1i8( , @@ -316,9 +316,97 @@ define @intrinsic_vssubu_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vssubu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vssubu.mask.nxv64i8.nxv64i8( @@ -599,9 +687,97 @@ define @intrinsic_vssubu_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vssubu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vssubu.mask.nxv32i16.nxv32i16( @@ -835,9 +1011,97 @@ define @intrinsic_vssubu_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vssubu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vssubu.mask.nxv16i32.nxv16i32( @@ -1024,9 +1288,97 @@ define @intrinsic_vssubu_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, i64 %4) nounwind { ; CHECK-LABEL: intrinsic_vssubu_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vssubu.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vssubu.mask.nxv8i64.nxv8i64( diff --git a/llvm/test/CodeGen/RISCV/rvv/vsub.ll b/llvm/test/CodeGen/RISCV/rvv/vsub.ll --- a/llvm/test/CodeGen/RISCV/rvv/vsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsub.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vsub.nxv1i8.nxv1i8( , @@ -311,9 +311,97 @@ define @intrinsic_vsub_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsub_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vsub.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsub.mask.nxv64i8.nxv64i8( @@ -588,9 +676,97 @@ define @intrinsic_vsub_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsub_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vsub.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsub.mask.nxv32i16.nxv32i16( @@ -819,9 +995,97 @@ define @intrinsic_vsub_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsub_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vsub.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsub.mask.nxv16i32.nxv16i32( @@ -1004,9 +1268,97 @@ define @intrinsic_vsub_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vsub_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vsub.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vsub.mask.nxv8i64.nxv8i64( @@ -2102,12 +2454,98 @@ ; RV32-LABEL: intrinsic_vsub_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vsub.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll @@ -157,6 +157,35 @@ define @vtrunc_nxv15i16_nxv15i64( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vtrunc_nxv15i16_nxv15i64: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x06, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 6 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 3 @@ -180,6 +209,33 @@ ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vnsrl.wi v16, v20, 0, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.trunc.nxv15i16.nxv15i64( %a, %m, i32 %vl) ret %v @@ -214,6 +270,35 @@ define @vtrunc_nxv32i7_nxv32i32( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vtrunc_nxv32i7_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x06, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 6 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 @@ -238,6 +323,33 @@ ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-NEXT: vnsrl.wi v16, v20, 0, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.trunc.nxv32i7.nxv32i32( %a, %m, i32 %vl) ret %v @@ -248,6 +360,35 @@ define @vtrunc_nxv32i8_nxv32i32( %a, %m, i32 zeroext %vl) { ; CHECK-LABEL: vtrunc_nxv32i8_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x06, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 6 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 @@ -272,6 +413,33 @@ ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-NEXT: vnsrl.wi v16, v20, 0, v0.t ; CHECK-NEXT: vmv4r.v v8, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.trunc.nxv32i8.nxv32i32( %a, %m, i32 %vl) ret %v @@ -285,9 +453,69 @@ ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: li a3, 26 +; CHECK-NEXT: mul a1, a1, a3 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x1a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 26 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 25 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v1, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 24 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v2, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 23 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 22 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 21 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 20 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 19 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a3, 18 +; CHECK-NEXT: mul a1, a1, a3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a3, a1, 4 +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v1, v0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -354,7 +582,67 @@ ; CHECK-NEXT: vnsrl.wi v24, v8, 0, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 25 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v1, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 24 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v2, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 23 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 22 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 21 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 20 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 18 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 26 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vuitofp-vp.ll @@ -306,6 +306,52 @@ define @vuitofp_nxv32f16_nxv32i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vuitofp_nxv32f16_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 @@ -326,6 +372,50 @@ ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfncvt.f.xu.w v24, v8, v0.t ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.uitofp.nxv32f16.nxv32i32( %va, %m, i32 %evl) ret %v @@ -336,6 +426,14 @@ define @vuitofp_nxv32f32_nxv32i32( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vuitofp_nxv32f32_nxv32i32: ; CHECK: # %bb.0: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: srli a2, a1, 2 @@ -355,6 +453,12 @@ ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.uitofp.nxv32f32.nxv32i32( %va, %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd.w.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd.w.ll --- a/llvm/test/CodeGen/RISCV/rvv/vwadd.w.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwadd.w.ll @@ -1,11 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs -early-live-intervals | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs -early-live-intervals | FileCheck %s declare @llvm.riscv.vwadd.w.nxv1i16.nxv1i8( , @@ -275,9 +275,51 @@ define @intrinsic_vwadd.w_mask_wv_nxv32i16_nxv32i16_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vwadd.w_mask_wv_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl4r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vwadd.wv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.mask.nxv32i16.nxv32i8( @@ -511,9 +553,51 @@ define @intrinsic_vwadd.w_mask_wv_nxv16i32_nxv16i32_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vwadd.w_mask_wv_nxv16i32_nxv16i32_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl4re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vwadd.wv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.mask.nxv16i32.nxv16i16( @@ -700,9 +784,51 @@ define @intrinsic_vwadd.w_mask_wv_nxv8i64_nxv8i64_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vwadd.w_mask_wv_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl4re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vwadd.wv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.mask.nxv8i64.nxv8i32( @@ -2018,9 +2144,97 @@ define @intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vwadd.wv v24, v16, v8 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv32i16.nxv32i8( @@ -2154,9 +2368,97 @@ define @intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vwadd.wv v24, v16, v8 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwadd.w.nxv8i64.nxv8i32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w.ll b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w.ll --- a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s declare @llvm.riscv.vwaddu.w.nxv1i16.nxv1i8( , @@ -271,9 +271,51 @@ define @intrinsic_vwaddu.w_mask_wv_nxv32i16_nxv32i16_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vwaddu.w_mask_wv_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl4r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vwaddu.wv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.mask.nxv32i16.nxv32i8( @@ -507,9 +549,51 @@ define @intrinsic_vwaddu.w_mask_wv_nxv16i32_nxv16i32_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vwaddu.w_mask_wv_nxv16i32_nxv16i32_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl4re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vwaddu.wv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.mask.nxv16i32.nxv16i16( @@ -696,9 +780,51 @@ define @intrinsic_vwaddu.w_mask_wv_nxv8i64_nxv8i64_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vwaddu.w_mask_wv_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl4re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vwaddu.wv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.mask.nxv8i64.nxv8i32( @@ -2014,9 +2140,97 @@ define @intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vwaddu.wv v24, v16, v8 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv32i16.nxv32i8( @@ -2150,9 +2364,97 @@ define @intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vwaddu.wv v24, v16, v8 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwaddu.w.nxv8i64.nxv8i32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub.w.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub.w.ll --- a/llvm/test/CodeGen/RISCV/rvv/vwsub.w.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsub.w.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s declare @llvm.riscv.vwsub.w.nxv1i16.nxv1i8( , @@ -271,9 +271,51 @@ define @intrinsic_vwsub.w_mask_wv_nxv32i16_nxv32i16_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vwsub.w_mask_wv_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl4r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vwsub.wv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.mask.nxv32i16.nxv32i8( @@ -507,9 +549,51 @@ define @intrinsic_vwsub.w_mask_wv_nxv16i32_nxv16i32_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vwsub.w_mask_wv_nxv16i32_nxv16i32_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl4re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vwsub.wv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.mask.nxv16i32.nxv16i16( @@ -696,9 +780,51 @@ define @intrinsic_vwsub.w_mask_wv_nxv8i64_nxv8i64_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vwsub.w_mask_wv_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl4re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vwsub.wv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.mask.nxv8i64.nxv8i32( @@ -2014,9 +2140,97 @@ define @intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vwsub.wv v24, v16, v8 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv32i16.nxv32i8( @@ -2150,9 +2364,97 @@ define @intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vwsub.wv v24, v16, v8 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsub.w.nxv8i64.nxv8i32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w.ll b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w.ll --- a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s declare @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8( , @@ -271,9 +271,51 @@ define @intrinsic_vwsubu.w_mask_wv_nxv32i16_nxv32i16_nxv32i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vwsubu.w_mask_wv_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl4r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vwsubu.wv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.mask.nxv32i16.nxv32i8( @@ -507,9 +549,51 @@ define @intrinsic_vwsubu.w_mask_wv_nxv16i32_nxv16i32_nxv16i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vwsubu.w_mask_wv_nxv16i32_nxv16i32_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl4re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu ; CHECK-NEXT: vwsubu.wv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.mask.nxv16i32.nxv16i16( @@ -696,9 +780,51 @@ define @intrinsic_vwsubu.w_mask_wv_nxv8i64_nxv8i64_nxv8i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vwsubu.w_mask_wv_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl4re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, mu ; CHECK-NEXT: vwsubu.wv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.mask.nxv8i64.nxv8i32( @@ -2014,9 +2140,97 @@ define @intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; CHECK-NEXT: vwsubu.wv v24, v16, v8 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv32i16.nxv32i8( @@ -2150,9 +2364,97 @@ define @intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32( %0, %1, iXLen %2) nounwind { ; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; CHECK-NEXT: vwsubu.wv v24, v16, v8 ; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vwsubu.w.nxv8i64.nxv8i32( diff --git a/llvm/test/CodeGen/RISCV/rvv/vxor.ll b/llvm/test/CodeGen/RISCV/rvv/vxor.ll --- a/llvm/test/CodeGen/RISCV/rvv/vxor.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxor.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV32 -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v \ ; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,RV64 declare @llvm.riscv.vxor.nxv1i8.nxv1i8( , @@ -318,9 +318,97 @@ define @intrinsic_vxor_mask_vv_nxv64i8_nxv64i8_nxv64i8( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv64i8_nxv64i8_nxv64i8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu ; CHECK-NEXT: vxor.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vxor.mask.nxv64i8.nxv64i8( @@ -601,9 +689,97 @@ define @intrinsic_vxor_mask_vv_nxv32i16_nxv32i16_nxv32i16( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv32i16_nxv32i16_nxv32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu ; CHECK-NEXT: vxor.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vxor.mask.nxv32i16.nxv32i16( @@ -837,9 +1013,97 @@ define @intrinsic_vxor_mask_vv_nxv16i32_nxv16i32_nxv16i32( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv16i32_nxv16i32_nxv16i32: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu ; CHECK-NEXT: vxor.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vxor.mask.nxv16i32.nxv16i32( @@ -1026,9 +1290,97 @@ define @intrinsic_vxor_mask_vv_nxv8i64_nxv8i64_nxv8i64( %0, %1, %2, %3, iXLen %4) nounwind { ; CHECK-LABEL: intrinsic_vxor_mask_vv_nxv8i64_nxv8i64_nxv8i64: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, mu ; CHECK-NEXT: vxor.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vxor.mask.nxv8i64.nxv8i64( @@ -2146,12 +2498,98 @@ ; RV32-LABEL: intrinsic_vxor_mask_vx_nxv8i64_nxv8i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: sub sp, sp, a3 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 3 +; RV32-NEXT: sub a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v24, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 6 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v25, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 2 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v26, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 2 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v27, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a4, a3, 1 +; RV32-NEXT: add a3, a4, a3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v28, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 1 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v29, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs1r.v v30, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs1r.v v31, (a3) # Unknown-size Folded Spill ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vlse64.v v24, (a0), zero ; RV32-NEXT: vxor.vv v8, v16, v24, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 3 +; RV32-NEXT: sub a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 6 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 2 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a1, a0, 1 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 16 +; RV32-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll --- a/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vzext-vp.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=riscv32 -mattr=+v < %s | FileCheck %s -; RUN: llc -mtriple=riscv64 -mattr=+v < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+m,+v < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+m,+v < %s | FileCheck %s declare @llvm.vp.zext.nxv2i16.nxv2i8(, , i32) @@ -167,10 +167,100 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB12_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vzext.vf4 v24, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.zext.nxv32i32.nxv32i8( %a, %m, i32 %vl) ret %v @@ -191,9 +281,99 @@ ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB13_2: +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 3 +; CHECK-NEXT: sub a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: li a2, 6 +; CHECK-NEXT: mul a1, a1, a2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v25, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 2 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v26, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v27, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v28, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v29, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v30, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vzext.vf4 v24, v8 ; CHECK-NEXT: vmv.v.v v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret %v = call @llvm.vp.zext.nxv32i32.nxv32i8( %a, shufflevector ( insertelement ( undef, i1 true, i32 0), undef, zeroinitializer), i32 %vl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/xsfvcp-xvv.ll b/llvm/test/CodeGen/RISCV/rvv/xsfvcp-xvv.ll --- a/llvm/test/CodeGen/RISCV/rvv/xsfvcp-xvv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/xsfvcp-xvv.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zfh,+xsfvcp \ +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+m,+v,+zfh,+xsfvcp \ ; RUN: -verify-machineinstrs | FileCheck %s -; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zfh,+xsfvcp \ +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+m,+v,+zfh,+xsfvcp \ ; RUN: -verify-machineinstrs | FileCheck %s define void @test_sf_vc_vvv_se_e8mf8( %vd, %vs2, %vs1, iXLen %vl) { @@ -85,9 +85,99 @@ define void @test_sf_vc_vvv_se_e8m8( %vd, %vs2, %vs1, iXLen %vl) { ; CHECK-LABEL: test_sf_vc_vvv_se_e8m8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: sf.vc.vvv 3, v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: tail call void @llvm.riscv.sf.vc.vvv.se.iXLen.nxv64i8.nxv64i8.iXLen(iXLen 3, %vd, %vs2, %vs1, iXLen %vl) @@ -164,9 +254,99 @@ define void @test_sf_vc_vvv_se_e16m8( %vd, %vs2, %vs1, iXLen %vl) { ; CHECK-LABEL: test_sf_vc_vvv_se_e16m8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: sf.vc.vvv 3, v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: tail call void @llvm.riscv.sf.vc.vvv.se.iXLen.nxv32i16.nxv32i16.iXLen(iXLen 3, %vd, %vs2, %vs1, iXLen %vl) @@ -230,9 +410,99 @@ define void @test_sf_vc_vvv_se_e32m8( %vd, %vs2, %vs1, iXLen %vl) { ; CHECK-LABEL: test_sf_vc_vvv_se_e32m8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: sf.vc.vvv 3, v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: tail call void @llvm.riscv.sf.vc.vvv.se.iXLen.nxv16i32.nxv16i32.iXLen(iXLen 3, %vd, %vs2, %vs1, iXLen %vl) @@ -283,9 +553,99 @@ define void @test_sf_vc_vvv_se_e64m8( %vd, %vs2, %vs1, iXLen %vl) { ; CHECK-LABEL: test_sf_vc_vvv_se_e64m8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: sf.vc.vvv 3, v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: tail call void @llvm.riscv.sf.vc.vvv.se.iXLen.nxv8i64.nxv8i64.iXLen(iXLen 3, %vd, %vs2, %vs1, iXLen %vl) @@ -375,9 +735,99 @@ define @test_sf_vc_v_vvv_se_e8m8( %vd, %vs2, %vs1, iXLen %vl) { ; CHECK-LABEL: test_sf_vc_v_vvv_se_e8m8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: sf.vc.v.vvv 3, v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call @llvm.riscv.sf.vc.v.vvv.se.nxv64i8.iXLen.nxv64i8.iXLen(iXLen 3, %vd, %vs2, %vs1, iXLen %vl) @@ -454,9 +904,99 @@ define @test_sf_vc_v_vvv_se_e16m8( %vd, %vs2, %vs1, iXLen %vl) { ; CHECK-LABEL: test_sf_vc_v_vvv_se_e16m8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: sf.vc.v.vvv 3, v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call @llvm.riscv.sf.vc.v.vvv.se.nxv32i16.iXLen.nxv32i16.iXLen(iXLen 3, %vd, %vs2, %vs1, iXLen %vl) @@ -520,9 +1060,99 @@ define @test_sf_vc_v_vvv_se_e32m8( %vd, %vs2, %vs1, iXLen %vl) { ; CHECK-LABEL: test_sf_vc_v_vvv_se_e32m8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: sf.vc.v.vvv 3, v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call @llvm.riscv.sf.vc.v.vvv.se.nxv16i32.iXLen.nxv16i32.iXLen(iXLen 3, %vd, %vs2, %vs1, iXLen %vl) @@ -573,9 +1203,99 @@ define @test_sf_vc_v_vvv_se_e64m8( %vd, %vs2, %vs1, iXLen %vl) { ; CHECK-LABEL: test_sf_vc_v_vvv_se_e64m8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: sf.vc.v.vvv 3, v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call @llvm.riscv.sf.vc.v.vvv.se.nxv8i64.iXLen.nxv8i64.iXLen(iXLen 3, %vd, %vs2, %vs1, iXLen %vl) @@ -665,9 +1385,99 @@ define @test_sf_vc_v_vvv_e8m8( %vd, %vs2, %vs1, iXLen %vl) { ; CHECK-LABEL: test_sf_vc_v_vvv_e8m8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: sf.vc.v.vvv 3, v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call @llvm.riscv.sf.vc.v.vvv.nxv64i8.iXLen.nxv64i8.iXLen(iXLen 3, %vd, %vs2, %vs1, iXLen %vl) @@ -744,9 +1554,99 @@ define @test_sf_vc_v_vvv_e16m8( %vd, %vs2, %vs1, iXLen %vl) { ; CHECK-LABEL: test_sf_vc_v_vvv_e16m8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: sf.vc.v.vvv 3, v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call @llvm.riscv.sf.vc.v.vvv.nxv32i16.iXLen.nxv32i16.iXLen(iXLen 3, %vd, %vs2, %vs1, iXLen %vl) @@ -810,9 +1710,99 @@ define @test_sf_vc_v_vvv_e32m8( %vd, %vs2, %vs1, iXLen %vl) { ; CHECK-LABEL: test_sf_vc_v_vvv_e32m8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: sf.vc.v.vvv 3, v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call @llvm.riscv.sf.vc.v.vvv.nxv16i32.iXLen.nxv16i32.iXLen(iXLen 3, %vd, %vs2, %vs1, iXLen %vl) @@ -863,9 +1853,99 @@ define @test_sf_vc_v_vvv_e64m8( %vd, %vs2, %vs1, iXLen %vl) { ; CHECK-LABEL: test_sf_vc_v_vvv_e64m8: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: sub a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v24, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: li a3, 6 +; CHECK-NEXT: mul a2, a2, a3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v25, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 2 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v26, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v27, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v28, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v29, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v30, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v31, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: sf.vc.v.vvv 3, v8, v16, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 3 +; CHECK-NEXT: sub a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v25, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 2 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v26, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v27, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v28, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v29, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v30, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v31, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call @llvm.riscv.sf.vc.v.vvv.nxv8i64.iXLen.nxv8i64.iXLen(iXLen 3, %vd, %vs2, %vs1, iXLen %vl) diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-zero-vl.ll b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-zero-vl.ll --- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-zero-vl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-zero-vl.ll @@ -11,10 +11,54 @@ define @test_vlseg2_mask_nxv16i16(ptr %base, %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 2 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a2, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v4, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 1 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v5, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs1r.v v6, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 0, e16, m4, ta, mu ; CHECK-NEXT: vlseg2e16.v v4, (a0) ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlseg2.nxv16i16( undef, undef, ptr %base, i64 0) @@ -30,10 +74,54 @@ define @test_vlsseg2_mask_nxv16i16(ptr %base, i64 %offset, %mask) { ; CHECK-LABEL: test_vlsseg2_mask_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 0, e16, m4, ta, mu ; CHECK-NEXT: vlsseg2e16.v v4, (a0), a1 ; CHECK-NEXT: vmv4r.v v8, v4 ; CHECK-NEXT: vlsseg2e16.v v4, (a0), a1, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,} @llvm.riscv.vlsseg2.nxv16i16( undef, undef, ptr %base, i64 %offset, i64 0) @@ -88,10 +176,54 @@ define @test_vlseg2ff_nxv16i16(ptr %base, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 0, e16, m4, ta, ma ; CHECK-NEXT: vlseg2e16ff.v v4, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a1) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.nxv16i16( undef, undef, ptr %base, i64 0) @@ -104,11 +236,55 @@ define @test_vlseg2ff_mask_nxv16i16( %val, ptr %base, %mask, ptr %outvl) { ; CHECK-LABEL: test_vlseg2ff_mask_nxv16i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi sp, sp, -16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 2 +; CHECK-NEXT: sub sp, sp, a2 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x04, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 4 * vlenb +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a3, a2, 1 +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v4, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 1 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v5, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs1r.v v6, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv4r.v v4, v8 ; CHECK-NEXT: vsetivli zero, 0, e16, m4, ta, mu ; CHECK-NEXT: vlseg2e16ff.v v4, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sd a0, 0(a1) +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 1 +; CHECK-NEXT: add a0, a1, a0 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v4, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 1 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v5, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v6, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 2 +; CHECK-NEXT: add sp, sp, a0 +; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %0 = tail call {,, i64} @llvm.riscv.vlseg2ff.mask.nxv16i16( %val, %val, ptr %base, %mask, i64 0, i64 1)